From 74d9aef2d3ffff1fd48bf261af06645d62b74582 Mon Sep 17 00:00:00 2001 From: Varun Jain Date: Tue, 26 Dec 2023 11:46:25 -0800 Subject: [PATCH] Semantic Search Signed-off-by: Varun Jain --- CHANGELOG.md | 1 + qa/build.gradle | 3 -- .../{TextSearch.java => SemanticSearch.java} | 32 +++++++++++++---- .../{TextSearch.java => SemanticSearch.java} | 34 +++++++++++++++---- .../neuralsearch/BaseNeuralSearchIT.java | 13 +++++++ .../opensearch/neuralsearch/TestUtils.java | 9 +++++ 6 files changed, 77 insertions(+), 15 deletions(-) rename qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{TextSearch.java => SemanticSearch.java} (68%) rename qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/{TextSearch.java => SemanticSearch.java} (71%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f02272c4..d95b63751 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Enhancements ### Bug Fixes ### Infrastructure +- BWC tests for Neural Search ([#515](https://github.com/opensearch-project/neural-search/pull/515)) ### Documentation ### Maintenance ### Refactoring diff --git a/qa/build.gradle b/qa/build.gradle index a9f3a3eb4..f5ecfb615 100644 --- a/qa/build.gradle +++ b/qa/build.gradle @@ -57,7 +57,6 @@ String default_bwc_version = System.getProperty("bwc.version") String neural_search_bwc_version = System.getProperty("tests.bwc.version", default_bwc_version) boolean isSnapshot = neural_search_bwc_version.contains("-SNAPSHOT") String neural_search_bwc_version_no_qualifier = isSnapshot ? neural_search_bwc_version - "-SNAPSHOT" : neural_search_bwc_version -String knn_bwc_version_no_qualifier = isSnapshot ? neural_search_bwc_version - "-SNAPSHOT" : neural_search_bwc_version String os_platform = "linux" String artifact_type = "tar" @@ -81,7 +80,6 @@ task deletetempDirectories { } task pullMlCommonsBwcPlugin { - //dependsOn "deletetempDirectories" doLast { copy { from(java.nio.file.Path.of(tmp_dir.absolutePath, "opensearch-${neural_search_bwc_version_no_qualifier}", "plugins", "opensearch-ml")) @@ -123,7 +121,6 @@ task pullKnnBwcPlugin { // Task to pull neural search plugin from archive task pullBwcPlugin { - //dependsOn "deletetempDirectories" doLast { copy { from(java.nio.file.Path.of(tmp_dir.absolutePath, "opensearch-${neural_search_bwc_version_no_qualifier}", "plugins", "opensearch-neural-search")) diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextSearch.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearch.java similarity index 68% rename from qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextSearch.java rename to qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearch.java index 286f0b6c6..86b322f8b 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextSearch.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearch.java @@ -9,17 +9,24 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; +import java.util.ArrayList; + import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER; -public class TextSearch extends AbstractRestartUpgradeRestTestCase{ +import static org.opensearch.neuralsearch.TestUtils.getModelId; +import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR; +import org.opensearch.neuralsearch.query.NeuralQueryBuilder; + + +public class SemanticSearch extends AbstractRestartUpgradeRestTestCase{ private static final String PIPELINE_NAME = "nlp-pipeline"; private static String DOC_ID = "0"; - private static final String TEST_FIELD = "test-field"; + private static final String TEST_FIELD = "passage_text"; private static final String TEXT= "Hello world"; - public void testIndex() throws Exception{ + public void testSemanticSearch() throws Exception{ waitForClusterHealthGreen(NODES_BWC_CLUSTER); if (isRunningAgainstOldCluster()){ @@ -33,17 +40,29 @@ public void testIndex() throws Exception{ ); addDocument(testIndex, DOC_ID,TEST_FIELD,TEXT); }else { - validateTestIndex(); + Map pipeline= getIngestionPipeline(PIPELINE_NAME); + assertNotNull(pipeline); + String modelId=getModelId(pipeline, TEXT_EMBEDDING_PROCESSOR); + validateTestIndex(modelId); + deleteIndex(testIndex); } } - private void validateTestIndex() throws Exception { + private void validateTestIndex(String modelId) throws Exception { int docCount=getDocCount(testIndex); assertEquals(1,docCount); - deleteIndex(testIndex); + loadModel(modelId); + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(); + neuralQueryBuilder.fieldName("passage_embedding"); + neuralQueryBuilder.modelId(modelId); + neuralQueryBuilder.queryText(TEXT); + neuralQueryBuilder.k(1); + Map response = search(testIndex, neuralQueryBuilder, 1); + assertNotNull(response); } + private String uploadTextEmbeddingModel() throws Exception { String requestBody = Files.readString(Path.of(classLoader.getResource("processor/UploadModelRequestBody.json").toURI())); return registerModelGroupAndGetModelId(requestBody); @@ -70,4 +89,5 @@ private Map getTotalHits(Map searchResponseAsMap Map hitsMap = (Map) searchResponseAsMap.get("hits"); return (Map) hitsMap.get("total"); } + } diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextSearch.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearch.java similarity index 71% rename from qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextSearch.java rename to qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearch.java index 0e133ff40..a3f37e72b 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/TextSearch.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/SemanticSearch.java @@ -8,9 +8,14 @@ import com.carrotsearch.randomizedtesting.RandomizedTest; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Map; +import java.util.ArrayList; import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER; +import static org.opensearch.neuralsearch.TestUtils.getModelId; +import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR; +import org.opensearch.neuralsearch.query.NeuralQueryBuilder; -public class TextSearch extends AbstractRollingUpgradeTestCase{ +public class SemanticSearch extends AbstractRollingUpgradeTestCase{ private static final String PIPELINE_NAME = "nlp-pipeline"; private static final String TEST_FIELD = "test-field"; private static final String TEXT= "Hello world"; @@ -18,7 +23,7 @@ public class TextSearch extends AbstractRollingUpgradeTestCase{ private static final String TEXT_UPGRADED= "Hello world upgraded"; private static final int NUM_DOCS = 1; - public void testIndex() throws Exception{ + public void testSemanticSearch() throws Exception{ waitForClusterHealthGreen(NODES_BWC_CLUSTER); switch (getClusterType()){ case OLD: @@ -33,29 +38,40 @@ public void testIndex() throws Exception{ addDocument(testIndex, "0",TEST_FIELD,TEXT); break; case MIXED: + modelId=getModelId(PIPELINE_NAME); int totalDocsCountMixed; if (isFirstMixedRound()){ totalDocsCountMixed=NUM_DOCS; - validateTestIndexOnUpgrade(totalDocsCountMixed); + validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, TEXT); addDocument(testIndex, "1",TEST_FIELD,TEXT_MIXED); + }else{ totalDocsCountMixed=2*NUM_DOCS; - validateTestIndexOnUpgrade(totalDocsCountMixed); + validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, TEXT_MIXED); } break; case UPGRADED: + modelId=getModelId(PIPELINE_NAME); int totalDocsCountUpgraded=3*NUM_DOCS; addDocument(testIndex, "2",TEST_FIELD,TEXT_UPGRADED); - validateTestIndexOnUpgrade(totalDocsCountUpgraded); + validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, TEXT_UPGRADED); deleteIndex(testIndex); break; } } - private void validateTestIndexOnUpgrade(int numberOfDocs) throws Exception { + private void validateTestIndexOnUpgrade(int numberOfDocs, String modelId, String text) throws Exception { int docCount=getDocCount(testIndex); assertEquals(numberOfDocs,docCount); + loadModel(modelId); + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(); + neuralQueryBuilder.fieldName("passage_embedding"); + neuralQueryBuilder.modelId(modelId); + neuralQueryBuilder.queryText(text); + neuralQueryBuilder.k(1); + Map response = search(testIndex, neuralQueryBuilder, 1); + assertNotNull(response); } private String uploadTextEmbeddingModel() throws Exception { @@ -79,4 +95,10 @@ protected void createPipelineProcessor(String modelId, String pipelineName, Proc String requestBody=Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI())); createPipelineProcessor(requestBody,pipelineName,modelId); } + + private String getModelId(String pipelineName){ + Map pipeline = getIngestionPipeline(PIPELINE_NAME); + assertNotNull(pipeline); + return getModelId(pipeline,TEXT_EMBEDDING_PROCESSOR); + } } diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index 0ccdc58c3..9c9999aba 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -837,6 +837,19 @@ protected void addDocument(String index, String docId, String fieldName, String assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); } + /** + * Get ingest pipeline + */ + @SneakyThrows + protected Map getIngestionPipeline(String pipelineName) { + Request request = new Request("GET", "/_ingest/pipeline/" + pipelineName); + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + String responseBody = EntityUtils.toString(response.getEntity()); + Map responseMap = createParser(XContentType.JSON.xContent(), responseBody).map(); + return (Map) responseMap.get(pipelineName); + } + /** * Enumeration for types of pipeline processors, used to lookup resources like create * processor request as those are type specific diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java b/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java index 9b805b227..52b208e5d 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/TestUtils.java @@ -53,6 +53,7 @@ public class TestUtils { public static final String SKIP_DELETE_MODEL_INDEX = "tests.skip_delete_model_index"; public static final String SECURITY_AUDITLOG_PREFIX = "security-auditlog"; public static final String OPENSEARCH_SYSTEM_INDEX_PREFIX = ".opensearch"; + public static final String TEXT_EMBEDDING_PROCESSOR = "text_embedding"; /** * Convert an xContentBuilder to a map @@ -302,4 +303,12 @@ private static Optional getMaxScore(Map searchResponseAsM Map hitsMap = (Map) searchResponseAsMap.get("hits"); return hitsMap.get("max_score") == null ? Optional.empty() : Optional.of(((Double) hitsMap.get("max_score")).floatValue()); } + + public static String getModelId(Map pipeline, String processor) { + ArrayList> processors = (ArrayList>) pipeline.get("processors"); + + Map textEmbeddingProcessor = (Map) processors.get(0).get(processor); + + return (String) textEmbeddingProcessor.get("model_id"); + } }