From 18c5246f1aa4bc3a08b2bade2cb67ef3b90e649c Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 11 Oct 2023 06:56:54 -0700 Subject: [PATCH] Disallow vectors whose magnitudes will not fit in a float (#100519) While we check for a magnitude to not be `0f`, we don't verify that it actually fits within a `float` value. This commit returns a failure and rejects `float` vectors whose magnitude don't fit within a 32bit `float` value. We don't support `float64` (aka `double`) values for vector search and should fail when a user attempts to index a vector that requires storing as `double`. closes: https://github.com/elastic/elasticsearch/issues/100471 --- docs/changelog/100519.yaml | 5 +++ .../vectors/DenseVectorFieldMapper.java | 13 ++++++- .../vectors/DenseVectorFieldMapperTests.java | 34 +++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/100519.yaml diff --git a/docs/changelog/100519.yaml b/docs/changelog/100519.yaml new file mode 100644 index 0000000000000..086c6962b3a95 --- /dev/null +++ b/docs/changelog/100519.yaml @@ -0,0 +1,5 @@ +pr: 100519 +summary: Disallow vectors whose magnitudes will not fit in a float +area: Vector Search +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index deb178ff724bb..ee144b25f4507 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -458,6 +458,15 @@ void checkVectorMagnitude( ) { StringBuilder errorBuilder = null; + if (Float.isNaN(squaredMagnitude) || Float.isInfinite(squaredMagnitude)) { + errorBuilder = new StringBuilder( + "NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float." + ); + } + if (errorBuilder != null) { + throw new IllegalArgumentException(appender.apply(errorBuilder).toString()); + } + if (similarity == VectorSimilarity.DOT_PRODUCT && Math.abs(squaredMagnitude - 1.0f) > 1e-4f) { errorBuilder = new StringBuilder( "The [" + VectorSimilarity.DOT_PRODUCT + "] similarity can only be used with unit-length vectors." @@ -886,7 +895,9 @@ public Query createKnnQuery( } elementType.checkVectorBounds(queryVector); - if (similarity == VectorSimilarity.DOT_PRODUCT || similarity == VectorSimilarity.COSINE) { + if (similarity == VectorSimilarity.DOT_PRODUCT + || similarity == VectorSimilarity.COSINE + || similarity == VectorSimilarity.MAX_INNER_PRODUCT) { float squaredMagnitude = VectorUtil.dotProduct(queryVector, queryVector); elementType.checkVectorMagnitude(similarity, ElementType.errorFloatElementsAppender(queryVector), squaredMagnitude); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 183c0083c7da1..6d562f88a0100 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -413,6 +413,40 @@ public void testCosineWithZeroByteVector() throws Exception { ); } + public void testMaxInnerProductWithValidNorm() throws Exception { + DocumentMapper mapper = createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dims", 3) + .field("index", true) + .field("similarity", VectorSimilarity.MAX_INNER_PRODUCT) + ) + ); + float[] vector = { -12.1f, 2.7f, -4 }; + // Shouldn't throw + mapper.parse(source(b -> b.array("field", vector))); + } + + public void testWithExtremeFloatVector() throws Exception { + for (VectorSimilarity vs : List.of(VectorSimilarity.COSINE, VectorSimilarity.DOT_PRODUCT, VectorSimilarity.COSINE)) { + DocumentMapper mapper = createDocumentMapper( + fieldMapping(b -> b.field("type", "dense_vector").field("dims", 3).field("index", true).field("similarity", vs)) + ); + float[] vector = { 0.07247924f, -4.310546E-11f, -1.7255947E30f }; + DocumentParsingException e = expectThrows( + DocumentParsingException.class, + () -> mapper.parse(source(b -> b.array("field", vector))) + ); + assertNotNull(e.getCause()); + assertThat( + e.getCause().getMessage(), + containsString( + "NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float." + ) + ); + } + } + public void testInvalidParameters() { MapperParsingException e = expectThrows( MapperParsingException.class,