From 7d0566403af3fa7e1898e2eee34ee83ec5cd0866 Mon Sep 17 00:00:00 2001 From: Ryan Bogan Date: Tue, 24 Sep 2024 12:44:09 -0700 Subject: [PATCH] Update API specifications for k-NN 2.17 changes (#588) * Update API specifications for k-NN 2.17 changes Signed-off-by: Ryan Bogan * Add changelog entry Signed-off-by: Ryan Bogan * Refactor k-NN tests, change naming to match theme, and update changelog message Signed-off-by: Ryan Bogan * Add spaceType parameter to train model API Signed-off-by: Ryan Bogan * Update sha references and remove version checks for asynchronous search Signed-off-by: Ryan Bogan * Add version checks back for asynchronous search Signed-off-by: Ryan Bogan * Update changelog style for entry Signed-off-by: Ryan Bogan --------- Signed-off-by: Ryan Bogan --- .github/workflows/test-spec.yml | 5 +- CHANGELOG.md | 1 + spec/namespaces/knn.yaml | 18 ++++- spec/schemas/_common.mapping.yaml | 8 ++ spec/schemas/_common.yaml | 10 +++ tests/default/_core/search/knn/on_disk.yaml | 71 ++++++++++++++++++ .../search/{knn.yaml => knn/search.yaml} | 2 +- tests/default/knn/train_model.yaml | 73 +++++++++++++++++++ 8 files changed, 183 insertions(+), 5 deletions(-) create mode 100644 tests/default/_core/search/knn/on_disk.yaml rename tests/default/_core/search/{knn.yaml => knn/search.yaml} (98%) create mode 100644 tests/default/knn/train_model.yaml diff --git a/.github/workflows/test-spec.yml b/.github/workflows/test-spec.yml index a202207de..8f15414e3 100644 --- a/.github/workflows/test-spec.yml +++ b/.github/workflows/test-spec.yml @@ -37,11 +37,12 @@ jobs: - version: 2.16.0 tests: snapshot - version: 2.17.0 + - version: 2.18.0 hub: opensearchstaging - ref: '@sha256:1273489ebbedcb470ea13563dae4c6dc6b2ed431e87e686ed931ae0733034b25' + ref: '@sha256:4445e195c53992038891519dc3be0d273cdaad1b047943d68921168ed243e7e9' - version: 3.0.0 hub: opensearchstaging - ref: '@sha256:06af2ba4037f8423dc1a4ed3cd29108a1912774e7c659e73f0fac09e1bb2b63d' + ref: '@sha256:cf07c0ffa7d9e8a3e7fdb58041caae3bb81f1123260431b99d0ebf4a52c3d9a3' name: test-opensearch-spec (version=${{ matrix.entry.version }}, hub=${{ matrix.entry.hub || 'opensearchproject' }}, tests=${{ matrix.entry.tests || 'default' }}) runs-on: ubuntu-latest diff --git a/CHANGELOG.md b/CHANGELOG.md index e76107eac..9f7833649 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -165,6 +165,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fixed tasks namespace schemas ([#520](https://github.com/opensearch-project/opensearch-api-specification/pull/520)) - Fixed `/_plugins/_transform/_preview` ([#568](https://github.com/opensearch-project/opensearch-api-specification/pull/568)) - Fixed create/delete/index operation in `_bulk` ([#582](https://github.com/opensearch-project/opensearch-api-specification/pull/582)) +- Add `mode` and `compression` to k-NN index creation and search, and add `rescore` and `oversample_factor` to k-NN search ([#588](https://github.com/opensearch-project/opensearch-api-specification/pull/588)) ### Security diff --git a/spec/namespaces/knn.yaml b/spec/namespaces/knn.yaml index 49352df55..a33bd02df 100644 --- a/spec/namespaces/knn.yaml +++ b/spec/namespaces/knn.yaml @@ -268,11 +268,16 @@ components: format: int32 description: type: string + mode: + type: string + compression_level: + type: string method: type: string + spaceType: + type: string required: - dimension - - method - training_field - training_index required: true @@ -281,7 +286,16 @@ components: knn.get_model@200: {} knn.search_models@200: {} knn.stats@200: {} - knn.train_model@200: {} + knn.train_model@200: + content: + application/json: + schema: + type: object + properties: + model_id: + type: string + required: + - model_id knn.warmup@200: {} parameters: knn.delete_model::path.model_id: diff --git a/spec/schemas/_common.mapping.yaml b/spec/schemas/_common.mapping.yaml index a26c15e61..2c26487fb 100644 --- a/spec/schemas/_common.mapping.yaml +++ b/spec/schemas/_common.mapping.yaml @@ -1141,6 +1141,14 @@ components: properties: dimension: type: number + space_type: + type: string + data_type: + type: string + mode: + type: string + compression_level: + type: string method: $ref: '#/components/schemas/KnnVectorMethod' required: diff --git a/spec/schemas/_common.yaml b/spec/schemas/_common.yaml index 08f8d68e4..889c8b014 100644 --- a/spec/schemas/_common.yaml +++ b/spec/schemas/_common.yaml @@ -660,6 +660,16 @@ components: boost: description: Boost value to apply to kNN scores type: number + method_parameters: + type: object + x-version-added: '2.16' + additionalProperties: + type: number + rescore: + type: object + x-version-added: '2.17' + additionalProperties: + type: number required: - vector QueryVector: diff --git a/tests/default/_core/search/knn/on_disk.yaml b/tests/default/_core/search/knn/on_disk.yaml new file mode 100644 index 000000000..ac42d42f4 --- /dev/null +++ b/tests/default/_core/search/knn/on_disk.yaml @@ -0,0 +1,71 @@ +$schema: ../../../../../json_schemas/test_story.schema.yaml + +description: Test search endpoint with knn query. +version: '>= 2.17' + +prologues: + - method: PUT + path: /movies + request: + payload: + settings: + index: + knn: true + mappings: + properties: + recommendation_vector: + type: knn_vector + dimension: 8 + space_type: l2 + data_type: float + mode: on_disk + compression_level: 16x + status: [200] + - method: POST + path: /_bulk + request: + content_type: application/x-ndjson + payload: + - {index: {_index: movies, _id: '1'}} + - {recommendation_vector: [1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5], duration: 12.2} + - {index: {_index: movies, _id: '2'}} + - {recommendation_vector: [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], duration: 7.1} + - {index: {_index: movies, _id: '3'}} + - {recommendation_vector: [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5], duration: 12.9} + - {index: {_index: movies, _id: '4'}} + - {recommendation_vector: [4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5], duration: 1.2} + - {index: {_index: movies, _id: '5'}} + - {recommendation_vector: [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], duration: 3.7} + - {index: {_index: movies, _id: '6'}} + - {recommendation_vector: [6.5, 6.5, 6.5, 6.5, 6.5, 6.5, 6.5, 6.5], duration: 10.3} + - {index: {_index: movies, _id: '7'}} + - {recommendation_vector: [7.5, 7.5, 7.5, 7.5, 7.5, 7.5, 7.5, 7.5], duration: 5.5} + - {index: {_index: movies, _id: '8'}} + - {recommendation_vector: [8.5, 8.5, 8.5, 8.5, 8.5, 8.5, 8.5, 8.5], duration: 4.4} + - {index: {_index: movies, _id: '9'}} + - {recommendation_vector: [9.5, 9.5, 9.5, 9.5, 9.5, 9.5, 9.5, 9.5], duration: 8.9} + status: [200] +epilogues: + - path: /movies + method: DELETE + status: [200, 404] + +chapters: + - synopsis: Test k-NN disk-based search. + method: POST + path: /{index}/_search + parameters: + index: movies + request: + payload: + query: + knn: + recommendation_vector: + vector: [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5] + k: 5 + method_parameters: + ef_search: 512 + rescore: + oversample_factor: 10 + response: + status: 200 \ No newline at end of file diff --git a/tests/default/_core/search/knn.yaml b/tests/default/_core/search/knn/search.yaml similarity index 98% rename from tests/default/_core/search/knn.yaml rename to tests/default/_core/search/knn/search.yaml index 6fcfb12e6..ae4daedeb 100644 --- a/tests/default/_core/search/knn.yaml +++ b/tests/default/_core/search/knn/search.yaml @@ -1,4 +1,4 @@ -$schema: ../../../../json_schemas/test_story.schema.yaml +$schema: ../../../../../json_schemas/test_story.schema.yaml description: Test search endpoint with knn query. version: '>= 1.2' diff --git a/tests/default/knn/train_model.yaml b/tests/default/knn/train_model.yaml new file mode 100644 index 000000000..b45ac5c6f --- /dev/null +++ b/tests/default/knn/train_model.yaml @@ -0,0 +1,73 @@ +$schema: ../../../json_schemas/test_story.schema.yaml + +description: Test training k-NN model with disk-based parameters. +version: '>= 2.17' + +prologues: + - method: PUT + path: /movies + request: + payload: + settings: + index: + knn: true + mappings: + properties: + recommendation_vector: + type: knn_vector + dimension: 8 + status: [200] + - method: POST + path: /_bulk + request: + content_type: application/x-ndjson + payload: + - {index: {_index: movies, _id: '1'}} + - {recommendation_vector: [1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5], duration: 12.2} + - {index: {_index: movies, _id: '2'}} + - {recommendation_vector: [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], duration: 7.1} + - {index: {_index: movies, _id: '3'}} + - {recommendation_vector: [3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5, 3.5], duration: 12.9} + - {index: {_index: movies, _id: '4'}} + - {recommendation_vector: [4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5], duration: 1.2} + - {index: {_index: movies, _id: '5'}} + - {recommendation_vector: [5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5, 5.5], duration: 3.7} + - {index: {_index: movies, _id: '6'}} + - {recommendation_vector: [6.5, 6.5, 6.5, 6.5, 6.5, 6.5, 6.5, 6.5], duration: 10.3} + - {index: {_index: movies, _id: '7'}} + - {recommendation_vector: [7.5, 7.5, 7.5, 7.5, 7.5, 7.5, 7.5, 7.5], duration: 5.5} + - {index: {_index: movies, _id: '8'}} + - {recommendation_vector: [8.5, 8.5, 8.5, 8.5, 8.5, 8.5, 8.5, 8.5], duration: 4.4} + - {index: {_index: movies, _id: '9'}} + - {recommendation_vector: [9.5, 9.5, 9.5, 9.5, 9.5, 9.5, 9.5, 9.5], duration: 8.9} + status: [200] +epilogues: + - path: /movies + method: DELETE + status: [200, 404] + - path: /_plugins/_knn/models/{model_id} + parameters: + model_id: ${train_model.test_model_id} + method: DELETE + status: [200, 404] + +chapters: + - synopsis: Test training a model with disk-based parameters. + id: train_model + method: POST + path: /_plugins/_knn/models/_train + request: + payload: + training_index: movies + training_field: recommendation_vector + dimension: 8 + max_training_vector_count: 1200 + search_size: 100 + description: Test model + mode: on_disk + compression_level: 32x + spaceType: l2 + response: + status: 200 + output: + test_model_id: payload.model_id \ No newline at end of file