-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Backport 2.x] Support default model id in neural_sparse query (#637)
* [FEATURE] support default model id in neural_sparse query (#614) * feature: implement default model id for neural sparse Signed-off-by: zhichao-aws <[email protected]> * feature: implement default model id for neural sparse Signed-off-by: zhichao-aws <[email protected]> * add ut Signed-off-by: zhichao-aws <[email protected]> * add ut it Signed-off-by: zhichao-aws <[email protected]> * add changelog Signed-off-by: zhichao-aws <[email protected]> * nit Signed-off-by: zhichao-aws <[email protected]> * fix ingest pipeline in it Signed-off-by: zhichao-aws <[email protected]> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <[email protected]> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <[email protected]> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <[email protected]> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <[email protected]> * fix undeploy with retry Signed-off-by: zhichao-aws <[email protected]> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <[email protected]> * add it for bwc restart-upgrade Signed-off-by: zhichao-aws <[email protected]> * optimize it code structure Signed-off-by: zhichao-aws <[email protected]> * add it for bwc rolling-upgrade Signed-off-by: zhichao-aws <[email protected]> * tidy Signed-off-by: zhichao-aws <[email protected]> * update index mapping in it Signed-off-by: zhichao-aws <[email protected]> * nit Signed-off-by: zhichao-aws <[email protected]> * move version check to build script Signed-off-by: zhichao-aws <[email protected]> * resolve modelId Signed-off-by: zhichao-aws <[email protected]> * nit Signed-off-by: zhichao-aws <[email protected]> * update init model id Signed-off-by: zhichao-aws <[email protected]> * modify versions check logic in bwc test Signed-off-by: zhichao-aws <[email protected]> * add comments Signed-off-by: zhichao-aws <[email protected]> * nit Signed-off-by: zhichao-aws <[email protected]> * updates for comments Signed-off-by: zhichao-aws <[email protected]> --------- Signed-off-by: zhichao-aws <[email protected]> (cherry picked from commit e41fba7) * resolve conflicts Signed-off-by: zhichao-aws <[email protected]> * spotless Apply Signed-off-by: zhichao-aws <[email protected]> * add dependency Signed-off-by: zhichao-aws <[email protected]> * update build.gradle Signed-off-by: zhichao-aws <[email protected]> --------- Signed-off-by: zhichao-aws <[email protected]>
- Loading branch information
1 parent
4c1d3a3
commit 7e57f65
Showing
16 changed files
with
544 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
113 changes: 113 additions & 0 deletions
113
...upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralQueryEnricherProcessorIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.bwc; | ||
|
||
import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER; | ||
import static org.opensearch.neuralsearch.TestUtils.SPARSE_ENCODING_PROCESSOR; | ||
import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR; | ||
|
||
import org.opensearch.common.settings.Settings; | ||
import org.opensearch.neuralsearch.TestUtils; | ||
import org.opensearch.neuralsearch.query.NeuralQueryBuilder; | ||
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; | ||
|
||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.List; | ||
|
||
public class NeuralQueryEnricherProcessorIT extends AbstractRestartUpgradeRestTestCase { | ||
// add prefix to avoid conflicts with other IT class, since we don't wipe resources after first round | ||
private static final String SPARSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-sparse"; | ||
private static final String DENSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-dense"; | ||
private static final String SPARSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-sparse"; | ||
private static final String DENSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-dense"; | ||
private static final String TEST_ENCODING_FIELD = "passage_embedding"; | ||
private static final String TEST_TEXT_FIELD = "passage_text"; | ||
private static final String TEXT_1 = "Hello world a b"; | ||
|
||
// Test restart-upgrade neural_query_enricher in restart-upgrade scenario | ||
public void testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithoutModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD) | ||
.queryText(TEXT_1); | ||
// will set the model_id after we obtain the id | ||
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD) | ||
.queryText(TEXT_1); | ||
|
||
if (isRunningAgainstOldCluster()) { | ||
String modelId = uploadSparseEncodingModel(); | ||
loadModel(modelId); | ||
sparseEncodingQueryBuilderWithModelId.modelId(modelId); | ||
createPipelineForSparseEncodingProcessor(modelId, SPARSE_INGEST_PIPELINE_NAME); | ||
createIndexWithConfiguration( | ||
getIndexNameForTest(), | ||
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), | ||
SPARSE_INGEST_PIPELINE_NAME | ||
); | ||
|
||
addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1)); | ||
|
||
createSearchRequestProcessor(modelId, SPARSE_SEARCH_PIPELINE_NAME); | ||
updateIndexSettings( | ||
getIndexNameForTest(), | ||
Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_PIPELINE_NAME) | ||
); | ||
} else { | ||
String modelId = null; | ||
try { | ||
modelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); | ||
loadModel(modelId); | ||
sparseEncodingQueryBuilderWithModelId.modelId(modelId); | ||
assertEquals( | ||
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithoutModelId, 1).get("hits"), | ||
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithModelId, 1).get("hits") | ||
); | ||
} finally { | ||
wipeOfTestResources(getIndexNameForTest(), SPARSE_INGEST_PIPELINE_NAME, modelId, SPARSE_SEARCH_PIPELINE_NAME); | ||
} | ||
} | ||
} | ||
|
||
public void testNeuralQueryEnricherProcessor_NeuralSearch_E2EFlow() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); | ||
NeuralQueryBuilder neuralQueryBuilderWithModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); | ||
|
||
if (isRunningAgainstOldCluster()) { | ||
String modelId = uploadTextEmbeddingModel(); | ||
loadModel(modelId); | ||
neuralQueryBuilderWithModelId.modelId(modelId); | ||
createPipelineProcessor(modelId, DENSE_INGEST_PIPELINE_NAME); | ||
createIndexWithConfiguration( | ||
getIndexNameForTest(), | ||
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())), | ||
DENSE_INGEST_PIPELINE_NAME | ||
); | ||
|
||
addDocument(getIndexNameForTest(), "0", TEST_TEXT_FIELD, TEXT_1, null, null); | ||
|
||
createSearchRequestProcessor(modelId, DENSE_SEARCH_PIPELINE_NAME); | ||
updateIndexSettings(getIndexNameForTest(), Settings.builder().put("index.search.default_pipeline", DENSE_SEARCH_PIPELINE_NAME)); | ||
assertEquals( | ||
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"), | ||
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits") | ||
); | ||
} else { | ||
String modelId = null; | ||
try { | ||
modelId = TestUtils.getModelId(getIngestionPipeline(DENSE_INGEST_PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR); | ||
loadModel(modelId); | ||
neuralQueryBuilderWithModelId.modelId(modelId); | ||
|
||
assertEquals( | ||
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"), | ||
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits") | ||
); | ||
} finally { | ||
wipeOfTestResources(getIndexNameForTest(), DENSE_INGEST_PIPELINE_NAME, modelId, DENSE_SEARCH_PIPELINE_NAME); | ||
} | ||
} | ||
} | ||
} |
11 changes: 11 additions & 0 deletions
11
qa/restart-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"request_processors": [ | ||
{ | ||
"neural_query_enricher": { | ||
"tag": "tag1", | ||
"description": "This processor is going to restrict to publicly visible documents", | ||
"default_model_id": "%s" | ||
} | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.