From 3a4af4b3c76dc6fefa052a80adced0a6d8cb91e6 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Fri, 9 Aug 2024 16:35:02 -0400 Subject: [PATCH] [8.15] Fix NullPointerException when doing knn search on empty index without dims (#111756) (#111763) * Fix NullPointerException when doing knn search on empty index without dims (#111756) * Fix NullPointerException when doing knn search on empty index without dims * Update docs/changelog/111756.yaml * Fix typo in yaml test --------- Co-authored-by: Elastic Machine (cherry picked from commit 4e26114764df6a6e93e62701e5f205bcc9e0dad4) # Conflicts: # rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml * Update 40_knn_search.yml * Yaml --- docs/changelog/111756.yaml | 6 + .../test/search.vectors/40_knn_search.yml | 213 ++++++++++-------- .../vectors/DenseVectorFieldMapper.java | 8 +- 3 files changed, 131 insertions(+), 96 deletions(-) create mode 100644 docs/changelog/111756.yaml diff --git a/docs/changelog/111756.yaml b/docs/changelog/111756.yaml new file mode 100644 index 0000000000000..e58345dbe696a --- /dev/null +++ b/docs/changelog/111756.yaml @@ -0,0 +1,6 @@ +pr: 111756 +summary: Fix `NullPointerException` when doing knn search on empty index without dims +area: Vector Search +type: bug +issues: + - 111733 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml index 7f0c24e217d14..42472088cf76d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml @@ -29,14 +29,24 @@ setup: m: 16 ef_construction: 200 + - do: + indices.create: + index: test_empty + body: + mappings: + properties: + vector: + type: dense_vector + + - do: index: index: test id: "1" body: name: cow.jpg - vector: [230.0, 300.33, -34.8988, 15.555, -200.0] - another_vector: [130.0, 115.0, -1.02, 15.555, -100.0] + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + another_vector: [ 130.0, 115.0, -1.02, 15.555, -100.0 ] - do: index: @@ -44,8 +54,8 @@ setup: id: "2" body: name: moose.jpg - vector: [-0.5, 100.0, -13, 14.8, -156.0] - another_vector: [-0.5, 50.0, -1, 1, 120] + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] + another_vector: [ -0.5, 50.0, -1, 1, 120 ] - do: index: @@ -53,11 +63,11 @@ setup: id: "3" body: name: rabbit.jpg - vector: [0.5, 111.3, -13.0, 14.8, -156.0] - another_vector: [-0.5, 11.0, 0, 12, 111.0] + vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ] + another_vector: [ -0.5, 11.0, 0, 12, 111.0 ] - do: - indices.refresh: {} + indices.refresh: { } --- "kNN search only": @@ -71,15 +81,15 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - - match: {hits.hits.1._id: "3"} - - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } --- "kNN multi-field search only": - requires: @@ -91,14 +101,14 @@ setup: body: fields: [ "name" ] knn: - - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} - - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + - { field: vector, query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ], k: 2, num_candidates: 3 } + - { field: another_vector, query_vector: [ -0.5, 11.0, 0, 12, 111.0 ], k: 2, num_candidates: 3 } - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } --- "kNN search plus query": - requires: @@ -111,21 +121,21 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 query: term: name: cow.jpg - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.0.fields.name.0: "cow.jpg"} + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0.fields.name.0: "cow.jpg" } - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } - - match: {hits.hits.2._id: "3"} - - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2.fields.name.0: "rabbit.jpg" } --- "kNN multi-field search with query": - requires: @@ -137,20 +147,20 @@ setup: body: fields: [ "name" ] knn: - - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} - - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + - { field: vector, query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ], k: 2, num_candidates: 3 } + - { field: another_vector, query_vector: [ -0.5, 11.0, 0, 12, 111.0 ], k: 2, num_candidates: 3 } query: term: name: cow.jpg - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - - match: {hits.hits.1._id: "1"} - - match: {hits.hits.1.fields.name.0: "cow.jpg"} + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1.fields.name.0: "cow.jpg" } - - match: {hits.hits.2._id: "2"} - - match: {hits.hits.2.fields.name.0: "moose.jpg"} + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2.fields.name.0: "moose.jpg" } --- "kNN search with filter": - requires: @@ -163,16 +173,16 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: term: name: "rabbit.jpg" - - match: {hits.total.value: 1} - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - do: search: @@ -181,7 +191,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: @@ -190,7 +200,7 @@ setup: - term: _id: 2 - - match: {hits.total.value: 0} + - match: { hits.total.value: 0 } --- "kNN search with explicit search_type": @@ -206,7 +216,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 @@ -216,7 +226,7 @@ setup: --- "kNN search in _knn_search endpoint": - skip: - features: ["allowed_warnings"] + features: [ "allowed_warnings" ] - do: allowed_warnings: - "The kNN search API has been replaced by the `knn` option in the search API." @@ -226,22 +236,22 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - - match: {hits.hits.1._id: "3"} - - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } --- "kNN search with filter in _knn_search endpoint": - requires: cluster_features: "gte_v8.2.0" reason: 'kNN with filtering added in 8.2' - test_runner_features: ["allowed_warnings"] + test_runner_features: [ "allowed_warnings" ] - do: allowed_warnings: - "The kNN search API has been replaced by the `knn` option in the search API." @@ -251,16 +261,16 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: term: name: "rabbit.jpg" - - match: {hits.total.value: 1} - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - do: allowed_warnings: @@ -271,7 +281,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: @@ -280,7 +290,7 @@ setup: - term: _id: 2 - - match: {hits.total.value: 0} + - match: { hits.total.value: 0 } --- "Test nonexistent field": @@ -316,12 +326,12 @@ setup: k: 3 field: vector similarity: 11 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] - - length: {hits.hits: 1} + - length: { hits.hits: 1 } - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } --- "Vector similarity with filter only": - requires: @@ -337,13 +347,13 @@ setup: k: 3 field: vector similarity: 11 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] - filter: {"term": {"name": "moose.jpg"}} + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + filter: { "term": { "name": "moose.jpg" } } - - length: {hits.hits: 1} + - length: { hits.hits: 1 } - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - do: search: @@ -355,10 +365,10 @@ setup: k: 3 field: vector similarity: 110 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] - filter: {"term": {"name": "cow.jpg"}} + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + filter: { "term": { "name": "cow.jpg" } } - - length: {hits.hits: 0} + - length: { hits.hits: 0 } --- "Knn search with mip": - requires: @@ -390,7 +400,7 @@ setup: id: "1" body: name: cow.jpg - vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] - do: index: @@ -398,7 +408,7 @@ setup: id: "2" body: name: moose.jpg - vector: [-0.5, 100.0, -13, 14.8, -156.0] + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] - do: index: @@ -406,10 +416,10 @@ setup: id: "3" body: name: rabbit.jpg - vector: [0.5, 111.3, -13.0, 14.8, -156.0] + vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ] - do: - indices.refresh: {} + indices.refresh: { } - do: search: @@ -420,16 +430,16 @@ setup: num_candidates: 3 k: 3 field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] - - length: {hits.hits: 3} - - match: {hits.hits.0._id: "1"} - - close_to: {hits.hits.0._score: {value: 58694.902, error: 0.01}} - - match: {hits.hits.1._id: "3"} - - close_to: {hits.hits.1._score: {value: 34702.79, error: 0.01}} - - match: {hits.hits.2._id: "2"} - - close_to: {hits.hits.2._score: {value: 33686.29, error: 0.01}} + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 58694.902, error: 0.01 } } + - match: { hits.hits.1._id: "3" } + - close_to: { hits.hits.1._score: { value: 34702.79, error: 0.01 } } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 33686.29, error: 0.01 } } - do: search: @@ -440,14 +450,14 @@ setup: num_candidates: 3 k: 3 field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] filter: { "term": { "name": "moose.jpg" } } - - length: {hits.hits: 1} - - match: {hits.hits.0._id: "2"} - - close_to: {hits.hits.0._score: {value: 33686.29, error: 0.01}} + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "2" } + - close_to: { hits.hits.0._score: { value: 33686.29, error: 0.01 } } --- "Knn search with _name": - requires: @@ -462,7 +472,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 3 num_candidates: 3 _name: "my_knn_query" @@ -473,15 +483,34 @@ setup: _name: "my_query" - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.0.fields.name.0: "cow.jpg"} - - match: {hits.hits.0.matched_queries.0: "my_knn_query"} - - match: {hits.hits.0.matched_queries.1: "my_query"} + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0.fields.name.0: "cow.jpg" } + - match: { hits.hits.0.matched_queries.0: "my_knn_query" } + - match: { hits.hits.0.matched_queries.1: "my_query" } + + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } + - match: { hits.hits.1.matched_queries.0: "my_knn_query" } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2.fields.name.0: "rabbit.jpg" } + - match: { hits.hits.2.matched_queries.0: "my_knn_query" } + +--- +"kNN search on empty index should return 0 results and not an error": + - requires: + cluster_features: "gte_v8.15.1" + reason: 'Error fixed in 8.15.1' + - do: + search: + index: test_empty + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + k: 2 + num_candidates: 3 - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} - - match: {hits.hits.1.matched_queries.0: "my_knn_query"} + - match: { hits.total.value: 0 } - - match: {hits.hits.2._id: "3"} - - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} - - match: {hits.hits.2.matched_queries.0: "my_knn_query"} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index b9912b3e097a2..1ba80b203d9df 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -934,8 +934,8 @@ int parseDimensionCount(DocumentParserContext context) throws IOException { } @Override - public void checkDimensions(int dvDims, int qvDims) { - if (dvDims != qvDims * Byte.SIZE) { + public void checkDimensions(Integer dvDims, int qvDims) { + if (dvDims != null && dvDims != qvDims * Byte.SIZE) { throw new IllegalArgumentException( "The query vector has a different number of dimensions [" + qvDims * Byte.SIZE @@ -969,8 +969,8 @@ abstract void checkVectorMagnitude( float squaredMagnitude ); - public void checkDimensions(int dvDims, int qvDims) { - if (dvDims != qvDims) { + public void checkDimensions(Integer dvDims, int qvDims) { + if (dvDims != null && dvDims != qvDims) { throw new IllegalArgumentException( "The query vector has a different number of dimensions [" + qvDims + "] than the document vectors [" + dvDims + "]." );