Skip to content

Commit

Permalink
Merge branch 'main' into graph_size_bug_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanbogan committed Aug 6, 2024
2 parents 1db8365 + df7627c commit e3b1e9d
Show file tree
Hide file tree
Showing 193 changed files with 7,592 additions and 2,561 deletions.
26 changes: 23 additions & 3 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,30 @@ on:
branches:
- "*"
- "feature/**"
paths:
- 'build.gradle'
- 'settings.gradle'
- 'src/**'
- 'build-tools/**'
- 'buildSrc/**'
- 'gradle/**'
- 'jni/**'
- 'micro-benchmarks/**'
- '.github/workflows/CI.yml'
pull_request:
branches:
- "*"
- "feature/**"
paths:
- 'build.gradle'
- 'settings.gradle'
- 'src/**'
- 'build-tools/**'
- 'buildSrc/**'
- 'gradle/**'
- 'jni/**'
- 'micro-benchmarks/**'
- '.github/workflows/CI.yml'
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

Expand All @@ -22,7 +42,7 @@ jobs:
Build-k-NN-Linux:
strategy:
matrix:
java: [11, 17, 21]
java: [21]

name: Build and Test k-NN Plugin on Linux
runs-on: ubuntu-latest
Expand Down Expand Up @@ -71,7 +91,7 @@ jobs:
Build-k-NN-MacOS:
strategy:
matrix:
java: [ 11, 17, 21 ]
java: [ 21 ]

name: Build and Test k-NN Plugin on MacOS
needs: Get-CI-Image-Tag
Expand Down Expand Up @@ -111,7 +131,7 @@ jobs:
Build-k-NN-Windows:
strategy:
matrix:
java: [ 11, 17, 21 ]
java: [ 21 ]

name: Build and Test k-NN Plugin on Windows
needs: Get-CI-Image-Tag
Expand Down
24 changes: 22 additions & 2 deletions .github/workflows/backwards_compatibility_tests_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,36 @@ on:
branches:
- "*"
- "feature/**"
paths:
- 'build.gradle'
- 'settings.gradle'
- 'src/**'
- 'build-tools/**'
- 'buildSrc/**'
- 'gradle/**'
- 'jni/**'
- 'qa/**'
- '.github/workflows/backwards_compatibility_tests_workflow.yml'
pull_request:
branches:
- "*"
- "feature/**"
paths:
- 'build.gradle'
- 'settings.gradle'
- 'src/**'
- 'build-tools/**'
- 'buildSrc/**'
- 'gradle/**'
- 'jni/**'
- 'qa/**'
- '.github/workflows/backwards_compatibility_tests_workflow.yml'

jobs:
Restart-Upgrade-BWCTests-k-NN:
strategy:
matrix:
java: [ 11, 17 ]
java: [ 21 ]
os: [ubuntu-latest]
bwc_version : [ "2.0.1", "2.1.0", "2.2.1", "2.3.0", "2.4.1", "2.5.0", "2.6.0", "2.7.0", "2.8.0", "2.9.0", "2.10.0", "2.11.0", "2.12.0", "2.13.0", "2.14.0", "2.15.0", "2.16.0-SNAPSHOT"]
opensearch_version : [ "3.0.0-SNAPSHOT" ]
Expand Down Expand Up @@ -92,7 +112,7 @@ jobs:
Rolling-Upgrade-BWCTests-k-NN:
strategy:
matrix:
java: [ 11, 17 ]
java: [ 21 ]
os: [ubuntu-latest]
bwc_version: [ "2.16.0-SNAPSHOT" ]
opensearch_version: [ "3.0.0-SNAPSHOT" ]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/maven-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- uses: actions/setup-java@v3
with:
distribution: temurin # Temurin is a distribution of adoptium
java-version: 11
java-version: 21
- uses: actions/checkout@v3
- uses: aws-actions/configure-aws-credentials@v1
with:
Expand Down
18 changes: 18 additions & 0 deletions .github/workflows/test_security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,28 @@ on:
branches:
- "*"
- "feature/**"
paths:
- 'build.gradle'
- 'settings.gradle'
- 'src/**'
- 'build-tools/**'
- 'buildSrc/**'
- 'gradle/**'
- 'jni/**'
- '.github/workflows/test_security.yml'
pull_request:
branches:
- "*"
- "feature/**"
paths:
- 'build.gradle'
- 'settings.gradle'
- 'src/**'
- 'build-tools/**'
- 'buildSrc/**'
- 'gradle/**'
- 'jni/**'
- '.github/workflows/test_security.yml'
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

Expand Down
2 changes: 1 addition & 1 deletion .idea/copyright/SPDX_ALv2.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 12 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Enhancements
### Bug Fixes
### Infrastructure
* Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921)
### Documentation
### Maintenance
### Refactoring
Expand All @@ -17,8 +18,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Enhancements
### Bug Fixes
* Corrected search logic for scenario with non-existent fields in filter [#1874](https://github.com/opensearch-project/k-NN/pull/1874)
* Fix graph merge stats size calculation [#1844](https://github.com/opensearch-project/k-NN/pull/1844)
* Add script_fields context to KNNAllowlist [#1917] (https://github.com/opensearch-project/k-NN/pull/1917)
* * Fix graph merge stats size calculation [#1844](https://github.com/opensearch-project/k-NN/pull/1844)
### Infrastructure
### Documentation
### Maintenance
### Refactoring
* Fix a flaky unit test:testMultiFieldsKnnIndex, which was failing due to inconsistent merge behaviors [#1924](https://github.com/opensearch-project/k-NN/pull/1924)
### Refactoring
* Introduce KNNVectorValues interface to iterate on different types of Vector values during indexing and search [#1897](https://github.com/opensearch-project/k-NN/pull/1897)
* Clean up parsing for query [#1824](https://github.com/opensearch-project/k-NN/pull/1824)
* Refactor engine package structure [#1913](https://github.com/opensearch-project/k-NN/pull/1913)
* Refactor method structure and definitions [#1920](https://github.com/opensearch-project/k-NN/pull/1920)
* Refactor KNNVectorFieldType from KNNVectorFieldMapper to a separate class for better readability. [#1931](https://github.com/opensearch-project/k-NN/pull/1931)
* Generalize lib interface to return context objects [#1925](https://github.com/opensearch-project/k-NN/pull/1925)
* Move k search k-NN query to re-write phase of vector search query for Native Engines [#1877](https://github.com/opensearch-project/k-NN/pull/1877)
89 changes: 89 additions & 0 deletions TRIAGING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<img src="https://opensearch.org/assets/img/opensearch-logo-themed.svg" height="64px">

The maintainers of the k-NN/neural-search Repo's seek to promote an inclusive and engaged community of contributors. In
order to facilitate this, bi-weekly triage meetings are open-to-all and attendance is encouraged for anyone who hopes to
contribute, discuss an issue, or learn more about the project. To learn more about contributing to the
k-NN/neural-search Repo visit the [Contributing](./CONTRIBUTING.md) documentation.

### Do I need to attend for my issue to be addressed/triaged?

Attendance is not required for your issue to be triaged or addressed. All new issues are triaged bi-weekly.

### What happens if my issue does not get covered this time?

Each meeting we seek to address all new issues. However, should we run out of time before your issue is discussed, you
are always welcome to attend the next meeting or to follow up on the issue post itself.

### How do I join the Backlog & Triage meeting?

Meetings are hosted regularly at 5 PM Pacific Time on Tuesdays bi-weekly and can be joined via the links posted on the
[OpenSearch Meetup Group](https://www.meetup.com/opensearch/events/) list of events. The event will be titled
`Development Backlog & Triage Meeting - k-NN/neural-search`.

After joining the Chime meeting, you can enable your video / voice to join the discussion. If you do not have a webcam
or microphone available, you can still join in via the text chat.

If you have an issue you'd like to bring forth please consider getting a link to the issue so it can be presented to
everyone in the meeting.

### Is there an agenda for each week?

Meetings are 60 minutes and structured as follows:

1. Initial Gathering: As we gather, feel free to turn on video and engage in informal and open-to-all conversation. After a bit a volunteer will share their screen and proceed with the agenda.
2. Announcements: If there are any announcements to be made they will happen at the start of the meeting.
3. Review of New Issues: The meetings always start with reviewing all untriaged/recent issues for the k-NN and neural-search repositories.
4. Member Requests: Opportunity for any meeting member to ask for consideration of an issue or pull request.
5. Pull Request Discussion: Then, we review the status of outstanding pull requests from the k-NN and neural-search repositories.
6. Open Discussion: Allow for members of the meeting to surface any topics without issues filed or pull request created.


There is no specific ordering within each category.

If you have an issue you would like to discuss but do not have the ability to attend the entire meeting please attend when is best for you and signal that you have an issue to discuss when you arrive.

### Do I need to have already contributed to the project to attend a triage meeting?

No, all are welcome and encouraged to attend. Attending the Backlog & Triage meetings is a great way for a new contributor to learn about the project as well as explore different avenues of contribution.

### What if I have an issue that is almost a duplicate, should I open a new one to be triaged?

You can always open an issue including one that you think may be a duplicate. However, in cases where you believe there
is an important distinction to be made between an existing issue and your newly created one, you are encouraged to
attend the triaging meeting to explain.

### What if I have follow-up questions on an issue?

If you have an existing issue you would like to discuss, you can always comment on the issue itself. Alternatively, you
are welcome to come to the triage meeting to discuss.

### Is this meeting a good place to get help setting up k-NN/neural-search features on my OpenSearch instance?

While we are always happy to help the community, the best resource for implementation questions is [the OpenSearch forum](https://forum.opensearch.org/c/plugins/k-nn/48).

There you can find answers to many common questions as well as speak with implementation experts.

### What are the issue labels associated with triaging?

Yes, there are several labels that are used to identify the 'state' of issues filed in OpenSearch and the Security Plugin.

| Label | When applied | Meaning |
| ----- | ------------ | ------- |
| Untriaged | When issues are created or re-opened. | Issues labeled as 'Untriaged' require the attention of the repository maintainers and may need to be prioritized for quicker resolution. It's crucial to keep the count of 'Untriaged' labels low to ensure all potential security issues are addressed in a timely manner. See [SECURITY.md](https://github.com/opensearch-project/security/blob/main/SECURITY.md) for more details on handling these issues. |
| Triaged | During triage meetings. | Issues labeled as 'Triaged' have been reviewed and are deemed actionable. Opening a pull request for an issue with the 'Triaged' label has a higher likelihood of approval from the project maintainers, particularly in novel areas. |
| Neither Label | During triage meetings. | This category is for issues that lack sufficient details to formulate a potential solution. Until more details are provided, it's difficult to ascertain if a proposed solution would be acceptable. When dealing with an 'Untriaged' issue that falls into this category, the triage team should provide further insights so the issue can be appropriately closed or labeled as 'Triaged'. Issues in this state are reviewed during every triage meeting. |
| Help Wanted | Anytime. | Issues marked as 'Help Wanted' signal that they are actionable and not the current focus of the project maintainers. Community contributions are especially encouraged for these issues. |
| Good First Issue | Anytime. | Issues labeled as 'Good First Issue' are small in scope and can be resolved with a single pull request. These are recommended starting points for newcomers looking to make their first contributions. |


### What if my issue is critical to OpenSearch operations, do I have to wait for the bi-weekly meeting for it to be addressed?

All new issues for the [k-NN](https://github.com/opensearch-project/k-NN/issues?q=is%3Aissue+is%3Aopen+label%3Auntriaged) repo and [neural-search](https://github.com/opensearch-project/neural-search/issues?q=is%3Aissue+is%3Aopen+-label%3Atriaged) repo are reviewed daily to check for critical issues which require immediate triaging. If an issue relates to a severe concern for OpenSearch operation, it will be triaged by a maintainer mid-week. You can still come to discuss an issue at the following meeting even if it has already been triaged during the week.

### Is this where I should bring up potential security vulnerabilities?

Due to the sensitive nature of security vulnerabilities, please report all potential vulnerabilities directly by following the steps outlined on the [SECURITY.md](https://github.com/opensearch-project/k-NN/blob/main/SECURITY.md) document.

### Who should I contact if I have further questions?

You can always file an issue for any question you have about the project.
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import org.opensearch.cluster.ClusterModule;
import org.opensearch.common.xcontent.LoggingDeprecationHandler;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.common.bytes.BytesArray;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.NamedXContentRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.index.query.TermQueryBuilder;
import org.opensearch.knn.index.query.KNNQueryBuilder;
import org.opensearch.knn.index.query.parser.KNNQueryBuilderParser;
import org.opensearch.plugins.SearchPlugin;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;

/**
* Benchmarks for impact of changes around query parsing
*/
@Warmup(iterations = 5, time = 10)
@Measurement(iterations = 3, time = 10)
@Fork(3)
@State(Scope.Benchmark)
public class QueryParsingBenchmarks {
private static final TermQueryBuilder TERM_QUERY = QueryBuilders.termQuery("field", "value");
private static final NamedXContentRegistry NAMED_X_CONTENT_REGISTRY = xContentRegistry();

@Param({ "128", "1024" })
private int dimension;
@Param({ "basic", "filter" })
private String type;

private BytesReference bytesReference;

@Setup
public void setup() throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder();
builder.startObject();
builder.startObject("test");
builder.field(KNNQueryBuilder.VECTOR_FIELD.getPreferredName(), generateVectorWithOnes(dimension));
builder.field(KNNQueryBuilder.K_FIELD.getPreferredName(), 1);
if (type.equals("filter")) {
builder.field(KNNQueryBuilder.FILTER_FIELD.getPreferredName(), TERM_QUERY);
}
builder.endObject();
builder.endObject();
bytesReference = BytesReference.bytes(builder);
}

@Benchmark
public void fromXContent(final Blackhole bh) throws IOException {
XContentParser xContentParser = createParser();
bh.consume(KNNQueryBuilderParser.fromXContent(xContentParser));
}

private XContentParser createParser() throws IOException {
XContentParser contentParser = createParser(bytesReference);
contentParser.nextToken();
return contentParser;
}

private float[] generateVectorWithOnes(final int dimensions) {
float[] vector = new float[dimensions];
Arrays.fill(vector, (float) 1);
return vector;
}

private XContentParser createParser(final BytesReference data) throws IOException {
BytesArray array = (BytesArray) data;
return JsonXContent.jsonXContent.createParser(
NAMED_X_CONTENT_REGISTRY,
LoggingDeprecationHandler.INSTANCE,
array.array(),
array.offset(),
array.length()
);
}

private static NamedXContentRegistry xContentRegistry() {
List<NamedXContentRegistry.Entry> list = ClusterModule.getNamedXWriteables();
SearchPlugin.QuerySpec<?> spec = new SearchPlugin.QuerySpec<>(
TermQueryBuilder.NAME,
TermQueryBuilder::new,
TermQueryBuilder::fromXContent
);
list.add(new NamedXContentRegistry.Entry(QueryBuilder.class, spec.getName(), (p, c) -> spec.getParser().fromXContent(p)));
return new NamedXContentRegistry(list);
}
}
Loading

0 comments on commit e3b1e9d

Please sign in to comment.