Skip to content

Commit

Permalink
Merge main into branch ignore_unmapped
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Bogan <[email protected]>
  • Loading branch information
ryanbogan committed Sep 11, 2023
2 parents f97681d + 3173b8a commit 3445245
Show file tree
Hide file tree
Showing 18 changed files with 758 additions and 146 deletions.
78 changes: 64 additions & 14 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,86 @@ on:
- "feature/**"

jobs:
Build-k-NN:
Get-CI-Image-Tag:
runs-on: ubuntu-latest
outputs:
ci-image-version-linux: ${{ steps.step-ci-image-version-linux.outputs.ci-image-version-linux }}
steps:
- name: Install crane
uses: iarekylew00t/crane-installer@v1
with:
crane-release: v0.15.2
- name: Checkout opensearch-build repository
uses: actions/checkout@v2
with:
repository: 'opensearch-project/opensearch-build'
ref: 'main'
path: 'opensearch-build'
- name: Get ci image version from opensearch-build repository scripts
id: step-ci-image-version-linux
run: |
crane version
CI_IMAGE_VERSION=`opensearch-build/docker/ci/get-ci-images.sh -p rockylinux8 -u opensearch -t build | head -1`
echo $CI_IMAGE_VERSION
echo "ci-image-version-linux=$CI_IMAGE_VERSION" >> $GITHUB_OUTPUT
Build-k-NN-Linux:
strategy:
matrix:
java: [11, 17]
os: [ubuntu-latest, macos-latest]

name: Build and Test k-NN Plugin
runs-on: ${{ matrix.os }}
name: Build and Test k-NN Plugin on Linux
runs-on: ubuntu-latest
needs: Get-CI-Image-Tag
container:
# using the same image which is used by opensearch-build team to build the OpenSearch Distribution
# this image tag is subject to change as more dependencies and updates will arrive over time
image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
# need to switch to root so that github actions can install runner binary on container without permission issues.
options: --user root

steps:
- name: Checkout k-NN
uses: actions/checkout@v1
with:
submodules: true

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}

- name: Install dependencies on ubuntu
if: startsWith(matrix.os,'ubuntu')
- name: Run build
# switching the user, as OpenSearch cluster can only be started as root/Administrator on linux-deb/linux-rpm/windows-zip.
run: |
sudo apt-get install libopenblas-dev gfortran -y
chown -R opensearch.opensearch `pwd`
su opensearch -c "whoami && java -version && ./gradlew build"
- name: Upload Coverage Report
uses: codecov/codecov-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}

Build-k-NN-MacOS:
strategy:
matrix:
java: [ 11, 17 ]

name: Build and Test k-NN Plugin on MacOS
needs: Get-CI-Image-Tag
runs-on: macos-latest

steps:
- name: Checkout k-NN
uses: actions/checkout@v1

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}

- name: Install dependencies on macos
if: startsWith(matrix.os, 'macos')
run: |
brew reinstall gcc
export FC=/usr/local/Cellar/gcc/12.2.0/bin/gfortran
Expand All @@ -45,18 +100,13 @@ jobs:
run: |
./gradlew build
- name: Upload Coverage Report
if: startsWith(matrix.os,'ubuntu')
uses: codecov/codecov-action@v1
with:
token: ${{ secrets.CODECOV_TOKEN }}

Build-k-NN-Windows:
strategy:
matrix:
java: [ 11, 17 ]

name: Build and Test k-NN Plugin on Windows
needs: Get-CI-Image-Tag
runs-on: windows-latest

steps:
Expand Down
10 changes: 2 additions & 8 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Maintenance
### Refactoring

## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.9...2.x)
## [Unreleased 2.x](https://github.com/opensearch-project/k-NN/compare/2.10...2.x)
### Features
* Add Clear Cache API [#740](https://github.com/opensearch-project/k-NN/pull/740)
### Enhancements
* Enabled the IVF algorithm to work with Filters of K-NN Query. [#1013](https://github.com/opensearch-project/k-NN/pull/1013)
* Improved the logic to switch to exact search for restrictive filters search for better recall. [#1059](https://github.com/opensearch-project/k-NN/pull/1059)
* Added max distance computation logic to enhance the switch to exact search in filtered Nearest Neighbor Search. [#1066](https://github.com/opensearch-project/k-NN/pull/1066)
* Added support for ignore_unmapped in KNN queries. [#1071](https://github.com/opensearch-project/k-NN/pull/1071)
### Bug Fixes
### Bug Fixes
### Infrastructure
### Documentation
### Maintenance
* Update Guava Version to 32.0.1 [#1019](https://github.com/opensearch-project/k-NN/pull/1019)
### Refactoring
* Fix TransportAddress Refactoring Changes in Core [#1020](https://github.com/opensearch-project/k-NN/pull/1020)
17 changes: 17 additions & 0 deletions benchmarks/perf-tool/release-configs/faiss-hnswpq/index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"settings": {
"index": {
"knn": true,
"number_of_shards": 24,
"number_of_replicas": 1
}
},
"mappings": {
"properties": {
"target_field": {
"type": "knn_vector",
"model_id": "test-model"
}
}
}
}
15 changes: 15 additions & 0 deletions benchmarks/perf-tool/release-configs/faiss-hnswpq/method-spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"name":"hnsw",
"engine":"faiss",
"space_type": "l2",
"parameters":{
"ef_construction": 256,
"m": 16,
"encoder": {
"name": "pq",
"parameters": {
"m": 16
}
}
}
}
53 changes: 53 additions & 0 deletions benchmarks/perf-tool/release-configs/faiss-hnswpq/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
endpoint: [ENDPOINT]
test_name: "index-workflow"
test_id: "index workflow"
num_runs: 10
show_runs: false
setup:
- name: delete_index
index_name: train_index
- name: create_index
index_name: train_index
index_spec: /home/ec2-user/[PATH]/train-index-spec.json
- name: ingest
index_name: train_index
field_name: train_field
bulk_size: 500
dataset_format: hdf5
dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5
doc_count: 50000
- name: refresh_index
index_name: train_index
steps:
- name: delete_model
model_id: test-model
- name: delete_index
index_name: target_index
- name: train_model
model_id: test-model
train_index: train_index
train_field: train_field
dimension: 128
method_spec: /home/ec2-user/[PATH]/method-spec.json
max_training_vector_count: 50000
- name: create_index
index_name: target_index
index_spec: /home/ec2-user/[PATH]/index.json
- name: ingest
index_name: target_index
field_name: target_field
bulk_size: 500
dataset_format: hdf5
dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5
- name: refresh_index
index_name: target_index
- name: query
k: 100
r: 1
calculate_recall: true
index_name: target_index
field_name: target_field
dataset_format: hdf5
dataset_path: /home/ec2-user/data/sift-128-euclidean.hdf5
neighbors_format: hdf5
neighbors_path: /home/ec2-user/data/sift-128-euclidean.hdf5
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"settings": {
"index": {
"number_of_shards": 24,
"number_of_replicas": 0
}
},
"mappings": {
"properties": {
"train_field": {
"type": "knn_vector",
"dimension": 128
}
}
}
}
16 changes: 16 additions & 0 deletions release-notes/opensearch-knn.release-notes-2.10.0.0.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## Version 2.10.0.0 Release Notes

Compatible with OpenSearch 2.10.0

### Features
* Add Clear Cache API ([#740](https://github.com/opensearch-project/k-NN/pull/740))
### Enhancements
* Enabled the IVF algorithm to work with Filters of K-NN Query. ([#1013](https://github.com/opensearch-project/k-NN/pull/1013))
* Improved the logic to switch to exact search for restrictive filters search for better recall. ([#1059](https://github.com/opensearch-project/k-NN/pull/1059))
* Added max distance computation logic to enhance the switch to exact search in filtered Nearest Neighbor Search. ([#1066](https://github.com/opensearch-project/k-NN/pull/1066))
### Bug Fixes
* Update Faiss parameter construction to allow HNSW+PQ to work ([#1074](https://github.com/opensearch-project/k-NN/pull/1074))
### Maintenance
* Update Guava Version to 32.0.1 ([#1019](https://github.com/opensearch-project/k-NN/pull/1019))
### Refactoring
* Fix TransportAddress Refactoring Changes in Core ([#1020](https://github.com/opensearch-project/k-NN/pull/1020))
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,7 @@ public static Query create(CreateQueryRequest createQueryRequest) {

if (KNNEngine.getEnginesThatCreateCustomSegmentFiles().contains(createQueryRequest.getKnnEngine())) {
if (filterQuery != null && KNNEngine.getEnginesThatSupportsFilters().contains(createQueryRequest.getKnnEngine())) {
log.debug(
String.format(
"Creating custom k-NN query with filters for index: %s \"\", field: %s \"\", " + "k: %d",
indexName,
fieldName,
k
)
);
log.debug("Creating custom k-NN query with filters for index: {}, field: {} , k: {}", indexName, fieldName, k);
return new KNNQuery(fieldName, vector, k, indexName, filterQuery);
}
log.debug(String.format("Creating custom k-NN query for index: %s \"\", field: %s \"\", k: %d", indexName, fieldName, k));
Expand Down
9 changes: 8 additions & 1 deletion src/main/java/org/opensearch/knn/index/query/KNNWeight.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
return null;
}
if (canDoExactSearchAfterANNSearch(filterIdsArray.length, annResults.size())) {
log.debug(
"Doing ExactSearch after doing ANNSearch as the number of documents returned are less than "
+ "K, even when we have more than K filtered Ids. K: {}, ANNResults: {}, filteredIdCount: {}",
knnQuery.getK(),
annResults.size(),
filterIdsArray.length
);
annResults = doExactSearch(context, filterIdsArray);
}
docIdsToScoreMap.putAll(annResults);
Expand Down Expand Up @@ -390,7 +397,7 @@ private boolean canDoExactSearch(final int filterIdsCount, final int searchableD
return filterThresholdValue >= filterIdsCount;
}
// if no setting is set, then use the default max distance computation value to see if we can do exact search.
return KNNConstants.MAX_DISTANCE_COMPUTATIONS <= filterIdsCount * knnQuery.getQueryVector().length;
return KNNConstants.MAX_DISTANCE_COMPUTATIONS >= filterIdsCount * knnQuery.getQueryVector().length;
}

/**
Expand Down
Loading

0 comments on commit 3445245

Please sign in to comment.