Skip to content

Commit

Permalink
[FEATURE] support default model id in neural_sparse query (opensearch…
Browse files Browse the repository at this point in the history
…-project#614)

* feature: implement default model id for neural sparse

Signed-off-by: zhichao-aws <[email protected]>

* feature: implement default model id for neural sparse

Signed-off-by: zhichao-aws <[email protected]>

* add ut

Signed-off-by: zhichao-aws <[email protected]>

* add ut it

Signed-off-by: zhichao-aws <[email protected]>

* add changelog

Signed-off-by: zhichao-aws <[email protected]>

* nit

Signed-off-by: zhichao-aws <[email protected]>

* fix ingest pipeline in it

Signed-off-by: zhichao-aws <[email protected]>

* add it for bwc restart-upgrade

Signed-off-by: zhichao-aws <[email protected]>

* add it for bwc restart-upgrade

Signed-off-by: zhichao-aws <[email protected]>

* add it for bwc restart-upgrade

Signed-off-by: zhichao-aws <[email protected]>

* add it for bwc restart-upgrade

Signed-off-by: zhichao-aws <[email protected]>

* fix undeploy with retry

Signed-off-by: zhichao-aws <[email protected]>

* add it for bwc restart-upgrade

Signed-off-by: zhichao-aws <[email protected]>

* add it for bwc restart-upgrade

Signed-off-by: zhichao-aws <[email protected]>

* optimize it code structure

Signed-off-by: zhichao-aws <[email protected]>

* add it for bwc rolling-upgrade

Signed-off-by: zhichao-aws <[email protected]>

* tidy

Signed-off-by: zhichao-aws <[email protected]>

* update index mapping in it

Signed-off-by: zhichao-aws <[email protected]>

* nit

Signed-off-by: zhichao-aws <[email protected]>

* move version check to build script

Signed-off-by: zhichao-aws <[email protected]>

* resolve modelId

Signed-off-by: zhichao-aws <[email protected]>

* nit

Signed-off-by: zhichao-aws <[email protected]>

* update init model id

Signed-off-by: zhichao-aws <[email protected]>

* modify versions check logic in bwc test

Signed-off-by: zhichao-aws <[email protected]>

* add comments

Signed-off-by: zhichao-aws <[email protected]>

* nit

Signed-off-by: zhichao-aws <[email protected]>

* updates for comments

Signed-off-by: zhichao-aws <[email protected]>

---------

Signed-off-by: zhichao-aws <[email protected]>
(cherry picked from commit e41fba7)
  • Loading branch information
zhichao-aws committed Mar 14, 2024
1 parent 78fa7e4 commit fbd5ec6
Show file tree
Hide file tree
Showing 15 changed files with 544 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.12...2.x)
### Features
- Enabled support for applying default modelId in neural sparse query ([#614](https://github.com/opensearch-project/neural-search/pull/614)
### Enhancements
- Adding aggregations in hybrid query ([#630](https://github.com/opensearch-project/neural-search/pull/630))
- Support for post filter in hybrid query ([#633](https://github.com/opensearch-project/neural-search/pull/633))
Expand Down
22 changes: 20 additions & 2 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,21 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down Expand Up @@ -98,12 +107,21 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.is_old_cluster', 'false'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.SPARSE_ENCODING_PROCESSOR;
import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.TestUtils;
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

public class NeuralQueryEnricherProcessorIT extends AbstractRestartUpgradeRestTestCase {
// add prefix to avoid conflicts with other IT class, since we don't wipe resources after first round
private static final String SPARSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-sparse";
private static final String DENSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-dense";
private static final String SPARSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-sparse";
private static final String DENSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-dense";
private static final String TEST_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";

// Test restart-upgrade neural_query_enricher in restart-upgrade scenario
public void testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithoutModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1);
// will set the model_id after we obtain the id
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1);

if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
sparseEncodingQueryBuilderWithModelId.modelId(modelId);
createPipelineForSparseEncodingProcessor(modelId, SPARSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
SPARSE_INGEST_PIPELINE_NAME
);

addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1));

createSearchRequestProcessor(modelId, SPARSE_SEARCH_PIPELINE_NAME);
updateIndexSettings(
getIndexNameForTest(),
Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_PIPELINE_NAME)
);
} else {
String modelId = null;
try {
modelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
sparseEncodingQueryBuilderWithModelId.modelId(modelId);
assertEquals(
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithoutModelId, 1).get("hits"),
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithModelId, 1).get("hits")
);
} finally {
wipeOfTestResources(getIndexNameForTest(), SPARSE_INGEST_PIPELINE_NAME, modelId, SPARSE_SEARCH_PIPELINE_NAME);
}
}
}

public void testNeuralQueryEnricherProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);

if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
neuralQueryBuilderWithModelId.modelId(modelId);
createPipelineProcessor(modelId, DENSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())),
DENSE_INGEST_PIPELINE_NAME
);

addDocument(getIndexNameForTest(), "0", TEST_TEXT_FIELD, TEXT_1, null, null);

createSearchRequestProcessor(modelId, DENSE_SEARCH_PIPELINE_NAME);
updateIndexSettings(getIndexNameForTest(), Settings.builder().put("index.search.default_pipeline", DENSE_SEARCH_PIPELINE_NAME));
assertEquals(
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"),
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits")
);
} else {
String modelId = null;
try {
modelId = TestUtils.getModelId(getIngestionPipeline(DENSE_INGEST_PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
loadModel(modelId);
neuralQueryBuilderWithModelId.modelId(modelId);

assertEquals(
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"),
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits")
);
} finally {
wipeOfTestResources(getIndexNameForTest(), DENSE_INGEST_PIPELINE_NAME, modelId, DENSE_SEARCH_PIPELINE_NAME);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"request_processors": [
{
"neural_query_enricher": {
"tag": "tag1",
"description": "This processor is going to restrict to publicly visible documents",
"default_model_id": "%s"
}
}
]
}
33 changes: 30 additions & 3 deletions qa/rolling-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,21 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version
systemProperty 'tests.skip_delete_model_index', 'true'

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down Expand Up @@ -99,12 +108,21 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down Expand Up @@ -132,12 +150,21 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down
Loading

0 comments on commit fbd5ec6

Please sign in to comment.