Skip to content

Commit

Permalink
Disallow vectors whose magnitudes will not fit in a float (elastic#10…
Browse files Browse the repository at this point in the history
…0519)

While we check for a magnitude to not be `0f`, we don't verify that it
actually fits within a `float` value. 

This commit returns a failure and rejects `float` vectors whose
magnitude don't fit within a 32bit `float` value.

We don't support `float64` (aka `double`) values for vector search and
should fail when a user attempts to index a vector that requires storing
as `double`.

closes: elastic#100471
  • Loading branch information
benwtrent authored Oct 11, 2023
1 parent e411b57 commit 18c5246
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 1 deletion.
5 changes: 5 additions & 0 deletions docs/changelog/100519.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 100519
summary: Disallow vectors whose magnitudes will not fit in a float
area: Vector Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,15 @@ void checkVectorMagnitude(
) {
StringBuilder errorBuilder = null;

if (Float.isNaN(squaredMagnitude) || Float.isInfinite(squaredMagnitude)) {
errorBuilder = new StringBuilder(
"NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float."
);
}
if (errorBuilder != null) {
throw new IllegalArgumentException(appender.apply(errorBuilder).toString());
}

if (similarity == VectorSimilarity.DOT_PRODUCT && Math.abs(squaredMagnitude - 1.0f) > 1e-4f) {
errorBuilder = new StringBuilder(
"The [" + VectorSimilarity.DOT_PRODUCT + "] similarity can only be used with unit-length vectors."
Expand Down Expand Up @@ -886,7 +895,9 @@ public Query createKnnQuery(
}
elementType.checkVectorBounds(queryVector);

if (similarity == VectorSimilarity.DOT_PRODUCT || similarity == VectorSimilarity.COSINE) {
if (similarity == VectorSimilarity.DOT_PRODUCT
|| similarity == VectorSimilarity.COSINE
|| similarity == VectorSimilarity.MAX_INNER_PRODUCT) {
float squaredMagnitude = VectorUtil.dotProduct(queryVector, queryVector);
elementType.checkVectorMagnitude(similarity, ElementType.errorFloatElementsAppender(queryVector), squaredMagnitude);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,40 @@ public void testCosineWithZeroByteVector() throws Exception {
);
}

public void testMaxInnerProductWithValidNorm() throws Exception {
DocumentMapper mapper = createDocumentMapper(
fieldMapping(
b -> b.field("type", "dense_vector")
.field("dims", 3)
.field("index", true)
.field("similarity", VectorSimilarity.MAX_INNER_PRODUCT)
)
);
float[] vector = { -12.1f, 2.7f, -4 };
// Shouldn't throw
mapper.parse(source(b -> b.array("field", vector)));
}

public void testWithExtremeFloatVector() throws Exception {
for (VectorSimilarity vs : List.of(VectorSimilarity.COSINE, VectorSimilarity.DOT_PRODUCT, VectorSimilarity.COSINE)) {
DocumentMapper mapper = createDocumentMapper(
fieldMapping(b -> b.field("type", "dense_vector").field("dims", 3).field("index", true).field("similarity", vs))
);
float[] vector = { 0.07247924f, -4.310546E-11f, -1.7255947E30f };
DocumentParsingException e = expectThrows(
DocumentParsingException.class,
() -> mapper.parse(source(b -> b.array("field", vector)))
);
assertNotNull(e.getCause());
assertThat(
e.getCause().getMessage(),
containsString(
"NaN or Infinite magnitude detected, this usually means the vector values are too extreme to fit within a float."
)
);
}
}

public void testInvalidParameters() {
MapperParsingException e = expectThrows(
MapperParsingException.class,
Expand Down

0 comments on commit 18c5246

Please sign in to comment.