Skip to content

Commit

Permalink
merge main branch & resolve conflicts (#3)
Browse files Browse the repository at this point in the history
* Adding integ tests for scenario of hybrid query with aggregations (opensearch-project#632)

* Adding tests and params to ignore tests if needed

Signed-off-by: Martin Gaievski <[email protected]>

* [BUG FIX] Fix bwc failure in neural sparse search (opensearch-project#696)

* fix comments

Signed-off-by: zhichao-aws <[email protected]>

---------

Signed-off-by: Martin Gaievski <[email protected]>
Signed-off-by: zhichao-aws <[email protected]>
Co-authored-by: Martin Gaievski <[email protected]>
  • Loading branch information
zhichao-aws and martin-gaievski authored Apr 22, 2024
1 parent 0aa1a44 commit 17d7916
Show file tree
Hide file tree
Showing 12 changed files with 2,155 additions and 15 deletions.
71 changes: 71 additions & 0 deletions .github/workflows/test_aggregations.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
name: Run Additional Tests for Neural Search
on:
schedule:
- cron: '0 0 * * *' # every night
push:
branches:
- "*"
- "feature/**"
pull_request:
branches:
- "*"
- "feature/**"

jobs:
Get-CI-Image-Tag:
uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main
with:
product: opensearch

Check-neural-search-linux:
needs: Get-CI-Image-Tag
strategy:
matrix:
java: [11, 17, 21]
os: [ubuntu-latest]

name: Integ Tests Linux
runs-on: ${{ matrix.os }}
container:
# using the same image which is used by opensearch-build team to build the OpenSearch Distribution
# this image tag is subject to change as more dependencies and updates will arrive over time
image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
# need to switch to root so that github actions can install runner binary on container without permission issues.
options: --user root


steps:
- name: Checkout neural-search
uses: actions/checkout@v1

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}

- name: Run tests
run: |
chown -R 1000:1000 `pwd`
su `id -un 1000` -c "./gradlew ':integTest' -Dtest_aggs=true --tests \"org.opensearch.neuralsearch.query.aggregation.*IT\""
Check-neural-search-windows:
strategy:
matrix:
java: [11, 17, 21]
os: [windows-latest]

name: Integ Tests Windows
runs-on: ${{ matrix.os }}

steps:
- name: Checkout neural-search
uses: actions/checkout@v1

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}

- name: Run tests
run: |
./gradlew ':integTest' -Dtest_aggs=true --tests "org.opensearch.neuralsearch.query.aggregation.*IT"
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix async actions are left in neural_sparse query ([#438](https://github.com/opensearch-project/neural-search/pull/438))
- Fix typo for sparse encoding processor factory([#578](https://github.com/opensearch-project/neural-search/pull/578))
- Add non-null check for queryBuilder in NeuralQueryEnricherProcessor ([#615](https://github.com/opensearch-project/neural-search/pull/615))
- Add max_token_score field placeholder in NeuralSparseQueryBuilder to fix the rolling-upgrade from 2.x nodes bwc tests. ([#696](https://github.com/opensearch-project/neural-search/pull/696))
### Infrastructure
- Adding integration tests for scenario of hybrid query with aggregations ([#632](https://github.com/opensearch-project/neural-search/pull/632))
### Documentation
### Maintenance
### Refactoring
Expand Down
5 changes: 5 additions & 0 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ Additionally, to run integration tests on multi nodes with security enabled, run
./gradlew :integTest -Dsecurity.enabled=true -PnumNodes=3
```

Some integration tests are skipped by default, mainly to save time and resources. A special parameter is required to include those tests in the executed test suite. For example, the following command enables additional tests for aggregations when they are bundled with hybrid queries
```
./gradlew :integTest -PnumNodes=3 -Dtest_aggs=true
```

Integration tests can be run with remote cluster. For that run the following command and replace host/port/cluster name values with ones for the target cluster:

```
Expand Down
6 changes: 6 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,12 @@ task integTest(type: RestIntegTestTask) {
description = "Run tests against a cluster"
testClassesDirs = sourceSets.test.output.classesDirs
classpath = sourceSets.test.runtimeClasspath
boolean runCompleteAggsTestSuite = Boolean.parseBoolean(System.getProperty('test_aggs', "false"))
if (!runCompleteAggsTestSuite) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.query.aggregation.*IT"
}
}
}
tasks.named("check").configure { dependsOn(integTest) }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/**
* Implementation of Query interface for type NeuralSparseQuery when TwoPhaseNeuralSparse Enabled.
* Initialized, it currentQuery include all tokenQuery. After
* Initialized, it currentQuery include all tokenQuery. After
*/
@AllArgsConstructor
@Getter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import lombok.extern.log4j.Log4j2;

/**
* SparseEncodingQueryBuilder is responsible for handling "neural_sparse" query types. It uses an ML SPARSE_ENCODING model
* SparseEncodingQueryBuilder is responsible for handling "neural_sparse" query types. It uses an ML NEURAL_SPARSE model
* or SPARSE_TOKENIZE model to produce a Map with String keys and Float values for input text. Then it will be transformed
* to Lucene FeatureQuery wrapped by Lucene BooleanQuery.
*/
Expand All @@ -68,10 +68,16 @@ public class NeuralSparseQueryBuilder extends AbstractQueryBuilder<NeuralSparseQ
static final ParseField QUERY_TOKENS_FIELD = new ParseField("query_tokens");
@VisibleForTesting
static final ParseField MODEL_ID_FIELD = new ParseField("model_id");
// We use max_token_score field to help WAND scorer prune query clause in lucene 9.7. But in lucene 9.8 the inner
// logics change, this field is not needed any more.
@VisibleForTesting
@Deprecated
static final ParseField MAX_TOKEN_SCORE_FIELD = new ParseField("max_token_score").withAllDeprecated();
private static MLCommonsClientAccessor ML_CLIENT;
private String fieldName;
private String queryText;
private String modelId;
private Float maxTokenScore;
private Supplier<Map<String, Float>> queryTokensSupplier;
private NeuralSparseTwoPhaseParameters neuralSparseTwoPhaseParameters;
private static final Version MINIMAL_SUPPORTED_VERSION_DEFAULT_MODEL_ID = Version.V_2_13_0;
Expand All @@ -95,6 +101,7 @@ public NeuralSparseQueryBuilder(StreamInput in) throws IOException {
} else {
this.modelId = in.readString();
}
this.maxTokenScore = in.readOptionalFloat();
if (in.readBoolean()) {
Map<String, Float> queryTokens = in.readMap(StreamInput::readString, StreamInput::readFloat);
this.queryTokensSupplier = () -> queryTokens;
Expand Down Expand Up @@ -123,6 +130,7 @@ protected void doWriteTo(StreamOutput out) throws IOException {
} else {
out.writeString(StringUtils.defaultString(this.modelId, StringUtils.EMPTY));
}
out.writeOptionalFloat(maxTokenScore);
if (!Objects.isNull(this.queryTokensSupplier) && !Objects.isNull(this.queryTokensSupplier.get())) {
out.writeBoolean(true);
out.writeMap(this.queryTokensSupplier.get(), StreamOutput::writeString, StreamOutput::writeFloat);
Expand All @@ -144,6 +152,7 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
if (Objects.nonNull(modelId)) {
xContentBuilder.field(MODEL_ID_FIELD.getPreferredName(), modelId);
}
if (Objects.nonNull(maxTokenScore)) xContentBuilder.field(MAX_TOKEN_SCORE_FIELD.getPreferredName(), maxTokenScore);
if (Objects.nonNull(neuralSparseTwoPhaseParameters)) {
neuralSparseTwoPhaseParameters.doXContent(xContentBuilder);
}
Expand All @@ -159,7 +168,8 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws
* The expected parsing form looks like:
* "SAMPLE_FIELD": {
* "query_text": "string",
* "model_id": "string"
* "model_id": "string",
* "max_token_score": float (optional)
* }
*
* or
Expand Down Expand Up @@ -277,6 +287,8 @@ private static void parseQueryParams(XContentParser parser, NeuralSparseQueryBui
sparseEncodingQueryBuilder.queryText(parser.text());
} else if (MODEL_ID_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
sparseEncodingQueryBuilder.modelId(parser.text());
} else if (MAX_TOKEN_SCORE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
sparseEncodingQueryBuilder.maxTokenScore(parser.floatValue());
} else {
throw new ParsingException(
parser.getTokenLocation(),
Expand Down Expand Up @@ -320,6 +332,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws
return new NeuralSparseQueryBuilder().fieldName(fieldName)
.queryText(queryText)
.modelId(modelId)
.maxTokenScore(maxTokenScore)
.queryTokensSupplier(queryTokensSetOnce::get)
.neuralSparseTwoPhaseParameters(neuralSparseTwoPhaseParameters);
}
Expand Down Expand Up @@ -377,12 +390,13 @@ private static void validateQueryTokens(Map<String, Float> queryTokens) {
protected boolean doEquals(NeuralSparseQueryBuilder obj) {
if (this == obj) return true;
if (Objects.isNull(obj) || getClass() != obj.getClass()) return false;
if (Objects.isNull(queryTokensSupplier) && !Objects.isNull(obj.queryTokensSupplier)) return false;
if (!Objects.isNull(queryTokensSupplier) && Objects.isNull(obj.queryTokensSupplier)) return false;
if (Objects.isNull(queryTokensSupplier) && Objects.nonNull(obj.queryTokensSupplier)) return false;
if (Objects.nonNull(queryTokensSupplier) && Objects.isNull(obj.queryTokensSupplier)) return false;
EqualsBuilder equalsBuilder = new EqualsBuilder().append(fieldName, obj.fieldName)
.append(queryText, obj.queryText)
.append(modelId, obj.modelId);
if (!Objects.isNull(queryTokensSupplier)) {
.append(modelId, obj.modelId)
.append(maxTokenScore, obj.maxTokenScore);
if (Objects.nonNull(queryTokensSupplier)) {
equalsBuilder.append(queryTokensSupplier.get(), obj.queryTokensSupplier.get());
}
equalsBuilder.append(neuralSparseTwoPhaseParameters, obj.neuralSparseTwoPhaseParameters);
Expand All @@ -391,8 +405,8 @@ protected boolean doEquals(NeuralSparseQueryBuilder obj) {

@Override
protected int doHashCode() {
HashCodeBuilder builder = new HashCodeBuilder().append(fieldName).append(queryText).append(modelId);
if (!Objects.isNull(queryTokensSupplier)) {
HashCodeBuilder builder = new HashCodeBuilder().append(fieldName).append(queryText).append(modelId).append(maxTokenScore);
if (queryTokensSupplier != null) {
builder.append(queryTokensSupplier.get());
}
if (Objects.nonNull(neuralSparseTwoPhaseParameters)) {
Expand Down
Loading

0 comments on commit 17d7916

Please sign in to comment.