diff --git a/server/src/main/java/org/opensearch/index/mapper/DenseVectorFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DenseVectorFieldMapper.java index 8c9f9ec4b50e9..7d41382f0191e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DenseVectorFieldMapper.java @@ -9,30 +9,32 @@ import org.apache.lucene.document.KnnVectorField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.opensearch.common.Explicit; -import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.Nullable; +import org.opensearch.common.unit.Fuzziness; import org.opensearch.common.xcontent.XContentBuilder; import org.opensearch.common.xcontent.XContentParser; import org.opensearch.common.xcontent.support.XContentMapValues; import org.opensearch.index.query.QueryShardContext; -import org.opensearch.index.query.QueryShardException; import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; /** * Field Mapper for Dense vector type. Extends ParametrizedFieldMapper in order to easily configure mapping parameters. * * @opensearch.internal */ -public final class DenseVectorFieldMapper extends ParametrizedFieldMapper { +public final class DenseVectorFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "dense_vector"; @@ -49,12 +51,29 @@ private static DenseVectorFieldMapper toType(FieldMapper in) { * Builder for DenseVectorFieldMapper. This class defines the set of parameters that can be applied to the knn_vector * field type */ - public static class Builder extends ParametrizedFieldMapper.Builder { + public static class Builder extends FieldMapper.Builder { + private CopyTo copyTo = CopyTo.empty(); + private Integer dimension = 1; + private KnnContext knnContext = null; - private final Parameter hasDocValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, false); + public Builder(String name) { + super(name, Defaults.FIELD_TYPE); + builder = this; + } + + @Override + public DenseVectorFieldMapper build(BuilderContext context) { + final DenseVectorFieldType mappedFieldType = new DenseVectorFieldType(buildFullName(context), dimension, knnContext); + return new DenseVectorFieldMapper( + buildFullName(context), + fieldType, + mappedFieldType, + multiFieldsBuilder.build(this, context), + copyTo + ); + } - protected final Parameter dimension = new Parameter<>(Names.DIMENSION.getValue(), false, () -> 1, (n, c, o) -> { - int value = XContentMapValues.nodeIntegerValue(o); + public Builder dimension(int value) { if (value > MAX_DIMENSION) { throw new IllegalArgumentException( String.format(Locale.ROOT, "[dimension] value %d cannot be greater than %d for vector [%s]", value, MAX_DIMENSION, name) @@ -65,41 +84,13 @@ public static class Builder extends ParametrizedFieldMapper.Builder { String.format(Locale.ROOT, "[dimension] value %d must be greater than 0 for vector [%s]", value, name) ); } - return value; - }, m -> toType(m).dimension).setSerializer((b, n, v) -> b.field(n, v.intValue()), v -> Integer.toString(v.intValue())); - - private final Parameter knnContext = new Parameter<>( - Names.KNN.getValue(), - false, - () -> null, - (n, c, o) -> KnnContext.parse(o), - m -> toType(m).knnContext - ).setSerializer(((b, n, v) -> { - if (v == null) { - return; - } - b.startObject(n); - v.toXContent(b, ToXContent.EMPTY_PARAMS); - b.endObject(); - }), m -> m.getKnnAlgorithmContext().getMethod().name()); - - public Builder(String name) { - super(name); + this.dimension = value; + return this; } - @Override - protected List> getParameters() { - return List.of(dimension, knnContext, hasDocValues); - } - - @Override - public DenseVectorFieldMapper build(BuilderContext context) { - return new DenseVectorFieldMapper( - buildFullName(context), - new DenseVectorFieldType(buildFullName(context), dimension.get(), knnContext.get()), - multiFieldsBuilder.build(this, context), - copyTo.build() - ); + public Builder knn(KnnContext value) { + this.knnContext = value; + return this; } } @@ -113,12 +104,30 @@ public static class TypeParser implements Mapper.TypeParser { @Override public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { Builder builder = new DenseVectorFieldMapper.Builder(name); - Object dimensionField = node.get(Names.DIMENSION.getValue()); - String dimension = XContentMapValues.nodeStringValue(dimensionField, null); - if (dimension == null) { - throw new MapperParsingException(String.format(Locale.ROOT, "[dimension] property must be specified for field [%s]", name)); + TypeParsers.parseField(builder, name, node, parserContext); + + for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { + Map.Entry entry = iterator.next(); + String fieldName = entry.getKey(); + Object fieldNode = entry.getValue(); + switch (fieldName) { + case "dimension": + if (fieldNode == null) { + throw new MapperParsingException( + String.format(Locale.ROOT, "[dimension] property must be specified for field [%s]", name) + ); + } + builder.dimension(XContentMapValues.nodeIntegerValue(fieldNode, 1)); + iterator.remove(); + break; + case "knn": + builder.knn(KnnContext.parse(fieldNode)); + iterator.remove(); + break; + default: + break; + } } - builder.parse(name, parserContext, node); return builder; } } @@ -145,7 +154,7 @@ public DenseVectorFieldType(String name, Map meta, int dimension @Override public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { - throw new UnsupportedOperationException("Dense_vector does not support fields search"); + throw new UnsupportedOperationException("[fields search] are not supported on [" + CONTENT_TYPE + "] fields."); } @Override @@ -154,16 +163,47 @@ public String typeName() { } @Override - public Query existsQuery(QueryShardContext context) { - return new FieldExistsQuery(name()); + public Query termQuery(Object value, QueryShardContext context) { + throw new UnsupportedOperationException("[term] queries are not supported on [" + CONTENT_TYPE + "] fields."); } @Override - public Query termQuery(Object value, QueryShardContext context) { - throw new QueryShardException( - context, - "Dense_vector does not support exact searching, use KNN queries instead [" + name() + "]" - ); + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + QueryShardContext context + ) { + throw new UnsupportedOperationException("[fuzzy] queries are not supported on [" + CONTENT_TYPE + "] fields."); + } + + @Override + public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) { + throw new UnsupportedOperationException("[prefix] queries are not supported on [" + CONTENT_TYPE + "] fields."); + } + + @Override + public Query wildcardQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitive, + QueryShardContext context + ) { + throw new UnsupportedOperationException("[wildcard] queries are not supported on [" + CONTENT_TYPE + "] fields."); + } + + @Override + public Query regexpQuery( + String value, + int syntaxFlags, + int matchFlags, + int maxDeterminizedStates, + MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + throw new UnsupportedOperationException("[regexp] queries are not supported on [" + CONTENT_TYPE + "] fields."); } public int getDimension() { @@ -182,8 +222,14 @@ public KnnContext getKnnContext() { protected boolean hasDocValues; protected String modelId; - public DenseVectorFieldMapper(String simpleName, DenseVectorFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo) { - super(simpleName, mappedFieldType, multiFields, copyTo); + public DenseVectorFieldMapper( + String simpleName, + FieldType fieldType, + DenseVectorFieldType mappedFieldType, + MultiFields multiFields, + CopyTo copyTo + ) { + super(simpleName, fieldType, mappedFieldType, multiFields, copyTo); dimension = mappedFieldType.getDimension(); fieldType = new FieldType(DenseVectorFieldMapper.Defaults.FIELD_TYPE); isKnnEnabled = mappedFieldType.getKnnContext() != null; @@ -207,6 +253,57 @@ protected void parseCreateField(ParseContext context) throws IOException { parseCreateField(context, fieldType().getDimension()); } + @Override + protected void mergeOptions(FieldMapper other, List conflicts) { + DenseVectorFieldMapper denseVectorMergeWith = (DenseVectorFieldMapper) other; + if (!Objects.equals(dimension, denseVectorMergeWith.dimension)) { + conflicts.add("mapper [" + name() + "] has different [dimension]"); + } + + if (isOnlyOneObjectNull(knnContext, denseVectorMergeWith.knnContext) + || (isBothObjectsNotNull(knnContext, denseVectorMergeWith.knnContext) + && !Objects.equals(knnContext.getMetric(), denseVectorMergeWith.knnContext.getMetric()))) { + conflicts.add("mapper [" + name() + "] has different [metric]"); + } + + if (isBothObjectsNotNull(knnContext, denseVectorMergeWith.knnContext)) { + + if (!Objects.equals(knnContext.getMetric(), denseVectorMergeWith.knnContext.getMetric())) { + conflicts.add("mapper [" + name() + "] has different [metric]"); + } + + if (isBothObjectsNotNull(knnContext.getKnnAlgorithmContext(), denseVectorMergeWith.knnContext.getKnnAlgorithmContext())) { + KnnAlgorithmContext knnAlgorithmContext = knnContext.getKnnAlgorithmContext(); + KnnAlgorithmContext mergeWithKnnAlgorithmContext = denseVectorMergeWith.knnContext.getKnnAlgorithmContext(); + + if (isOnlyOneObjectNull(knnAlgorithmContext, mergeWithKnnAlgorithmContext) + || (isBothObjectsNotNull(knnAlgorithmContext, mergeWithKnnAlgorithmContext) + && !Objects.equals(knnAlgorithmContext.getMethod(), mergeWithKnnAlgorithmContext.getMethod()))) { + conflicts.add("mapper [" + name() + "] has different [method]"); + } + + if (isBothObjectsNotNull(knnAlgorithmContext, mergeWithKnnAlgorithmContext)) { + Map knnAlgoParams = knnAlgorithmContext.getParameters(); + Map mergeWithKnnAlgoParams = mergeWithKnnAlgorithmContext.getParameters(); + + if (isOnlyOneObjectNull(knnAlgoParams, mergeWithKnnAlgoParams) + || (isBothObjectsNotNull(knnAlgoParams, mergeWithKnnAlgoParams) + && !Objects.equals(knnAlgoParams, mergeWithKnnAlgoParams))) { + conflicts.add("mapper [" + name() + "] has different [knn algorithm parameters]"); + } + } + } + } + } + + private boolean isOnlyOneObjectNull(Object object1, Object object2) { + return object1 == null && object2 != null || object2 == null && object1 != null; + } + + private boolean isBothObjectsNotNull(Object object1, Object object2) { + return object1 != null && object2 != null; + } + protected void parseCreateField(ParseContext context, int dimension) throws IOException { context.path().add(simpleName()); @@ -276,12 +373,7 @@ protected boolean docValuesByDefault() { } @Override - public ParametrizedFieldMapper.Builder getMergeBuilder() { - return new DenseVectorFieldMapper.Builder(simpleName()).init(this); - } - - @Override - public final boolean parsesArrayValue() { + public boolean parsesArrayValue() { return true; } @@ -293,6 +385,13 @@ public DenseVectorFieldType fieldType() { @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); + + builder.field("dimension", dimension); + if (knnContext != null) { + builder.startObject("knn"); + knnContext.toXContent(builder, params); + builder.endObject(); + } } /** @@ -325,6 +424,7 @@ static class Defaults { static { FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setOmitNorms(true); FIELD_TYPE.setIndexOptions(IndexOptions.NONE); FIELD_TYPE.setDocValuesType(DocValuesType.NONE); FIELD_TYPE.freeze(); diff --git a/server/src/test/java/org/opensearch/index/mapper/DenseVectorFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DenseVectorFieldMapperTests.java new file mode 100644 index 0000000000000..30061af3e71eb --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/DenseVectorFieldMapperTests.java @@ -0,0 +1,380 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.document.KnnVectorField; +import org.apache.lucene.index.IndexableField; +import org.opensearch.common.Strings; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.index.mapper.DenseVectorFieldMapper.DenseVectorFieldType; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.containsString; +import static org.opensearch.index.mapper.KnnAlgorithmContext.Method.HNSW; +import static org.opensearch.index.mapper.KnnAlgorithmContextFactory.HNSW_PARAMETER_BEAM_WIDTH; +import static org.opensearch.index.mapper.KnnAlgorithmContextFactory.HNSW_PARAMETER_MAX_CONNECTIONS; + +public class DenseVectorFieldMapperTests extends FieldMapperTestCase2 { + + private static final float[] VECTOR = { 2.0f, 4.5f }; + + public void testValueDisplay() { + KnnAlgorithmContext knnMethodContext = new KnnAlgorithmContext( + HNSW, + Map.of(HNSW_PARAMETER_MAX_CONNECTIONS, 16, HNSW_PARAMETER_BEAM_WIDTH, 100) + ); + KnnContext knnContext = new KnnContext(Metric.L2, knnMethodContext); + MappedFieldType ft = new DenseVectorFieldType("field", 1, knnContext); + Object actualFloatArray = ft.valueForDisplay(VECTOR); + assertTrue(actualFloatArray instanceof float[]); + assertArrayEquals(VECTOR, (float[]) actualFloatArray, 0.0f); + } + + public void testSerializationWithoutKnn() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "dense_vector").field("dimension", 2))); + Mapper fieldMapper = mapper.mappers().getMapper("field"); + assertTrue(fieldMapper instanceof DenseVectorFieldMapper); + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) fieldMapper; + assertEquals(2, denseVectorFieldMapper.fieldType().getDimension()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", VECTOR))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertTrue(fields[0] instanceof KnnVectorField); + float[] actualVector = ((KnnVectorField) fields[0]).vectorValue(); + assertArrayEquals(VECTOR, actualVector, 0.0f); + } + + public void testSerializationWithKnn() throws IOException { + DocumentMapper mapper = createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "L2", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 16, "beam_width", 100)) + ) + ) + ) + ); + + Mapper fieldMapper = mapper.mappers().getMapper("field"); + assertTrue(fieldMapper instanceof DenseVectorFieldMapper); + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) fieldMapper; + assertEquals(2, denseVectorFieldMapper.fieldType().getDimension()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", VECTOR))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertTrue(fields[0] instanceof KnnVectorField); + float[] actualVector = ((KnnVectorField) fields[0]).vectorValue(); + assertArrayEquals(VECTOR, actualVector, 0.0f); + } + + @Override + protected DenseVectorFieldMapper.Builder newBuilder() { + return new DenseVectorFieldMapper.Builder("dense_vector"); + } + + public void testDeprecatedBoost() throws IOException { + createMapperService(fieldMapping(b -> { + minimalMapping(b); + b.field("boost", 2.0); + })); + String type = typeName(); + String[] warnings = new String[] { + "Parameter [boost] on field [field] is deprecated and will be removed in 8.0", + "Parameter [boost] has no effect on type [" + type + "] and will be removed in future" }; + allowedWarnings(warnings); + } + + public void testIfMinimalSerializesToItself() throws IOException { + XContentBuilder orig = JsonXContent.contentBuilder().startObject(); + createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, ToXContent.EMPTY_PARAMS); + orig.endObject(); + XContentBuilder parsedFromOrig = JsonXContent.contentBuilder().startObject(); + createMapperService(orig).documentMapper().mapping().toXContent(parsedFromOrig, ToXContent.EMPTY_PARAMS); + parsedFromOrig.endObject(); + assertEquals(Strings.toString(orig), Strings.toString(parsedFromOrig)); + } + + public void testForEmptyName() { + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(mapping(b -> { + b.startObject(""); + minimalMapping(b); + b.endObject(); + }))); + assertThat(e.getMessage(), containsString("name cannot be empty string")); + } + + protected void writeFieldValue(XContentBuilder b) throws IOException { + b.value(new float[] { 2.5f }); + } + + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "dense_vector"); + b.field("dimension", 1); + } + + protected void registerParameters(MapperTestCase.ParameterChecker checker) throws IOException {} + + @Override + protected Set unsupportedProperties() { + return org.opensearch.common.collect.Set.of("analyzer", "similarity", "doc_values", "store", "index"); + } + + protected String typeName() throws IOException { + MapperService ms = createMapperService(fieldMapping(this::minimalMapping)); + return ms.fieldType("field").typeName(); + } + + @Override + protected boolean supportsMeta() { + return false; + } + + public void testCosineMetric() throws IOException { + DocumentMapper mapper = createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "cosine", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 16, "beam_width", 100)) + ) + ) + ) + ); + + Mapper fieldMapper = mapper.mappers().getMapper("field"); + assertTrue(fieldMapper instanceof DenseVectorFieldMapper); + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) fieldMapper; + assertEquals(2, denseVectorFieldMapper.fieldType().getDimension()); + } + + public void testDotProductMetric() throws IOException { + DocumentMapper mapper = createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "dot_product", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 16, "beam_width", 100)) + ) + ) + ) + ); + + Mapper fieldMapper = mapper.mappers().getMapper("field"); + assertTrue(fieldMapper instanceof DenseVectorFieldMapper); + DenseVectorFieldMapper denseVectorFieldMapper = (DenseVectorFieldMapper) fieldMapper; + assertEquals(2, denseVectorFieldMapper.fieldType().getDimension()); + } + + public void testHNSWAlgorithmParametersInvalidInput() throws Exception { + XContentBuilder mappingInvalidMaxConnections = fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "dot_product", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 256, "beam_width", 50)) + ) + ) + ); + final MapperParsingException mapperExceptionInvalidMaxConnections = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(mappingInvalidMaxConnections) + ); + assertEquals("max_connections value cannot be greater than 16", mapperExceptionInvalidMaxConnections.getRootCause().getMessage()); + + XContentBuilder mappingInvalidBeamWidth = fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "dot_product", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 6, "beam_width", 1024)) + ) + ) + ); + final MapperParsingException mapperExceptionInvalidmBeamWidth = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(mappingInvalidBeamWidth) + ); + assertEquals("beam_width value cannot be greater than 512", mapperExceptionInvalidmBeamWidth.getRootCause().getMessage()); + + XContentBuilder mappingUnsupportedParam = fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "dot_product", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 6, "beam_width", 256, "some_param", 23)) + ) + ) + ); + final MapperParsingException mapperExceptionUnsupportedParam = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(mappingUnsupportedParam) + ); + assertEquals("Algorithm parameter [some_param] is not supported", mapperExceptionUnsupportedParam.getRootCause().getMessage()); + } + + public void testInvalidMetric() throws Exception { + XContentBuilder mappingInvalidMetric = fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field("knn", Map.of("metric", "LAMBDA", "algorithm", Map.of("name", "HNSW"))) + ); + final MapperParsingException mapperExceptionInvalidMetric = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(mappingInvalidMetric) + ); + assertEquals("[metric] value [LAMBDA] is invalid", mapperExceptionInvalidMetric.getRootCause().getMessage()); + } + + public void testInvalidAlgorithm() throws Exception { + XContentBuilder mappingInvalidAlgorithm = fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field("knn", Map.of("metric", "dot_product", "algorithm", Map.of("name", "MY_ALGORITHM"))) + ); + final MapperParsingException mapperExceptionInvalidAlgorithm = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(mappingInvalidAlgorithm) + ); + assertEquals( + "[algorithm name] value [MY_ALGORITHM] is invalid or not supported", + mapperExceptionInvalidAlgorithm.getRootCause().getMessage() + ); + } + + public void testInvalidParams() throws Exception { + XContentBuilder mapping = fieldMapping( + b -> b.field("type", "dense_vector").field("dimension", 2).field("my_field", "some_value").field("knn", Map.of()) + ); + final MapperParsingException mapperParsingException = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(mapping) + ); + assertEquals( + "Mapping definition for [field] has unsupported parameters: [my_field : some_value]", + mapperParsingException.getRootCause().getMessage() + ); + } + + public void testExceedMaxNumberOfAlgorithmParams() throws Exception { + Map algorithmParams = new HashMap<>(); + IntStream.range(0, 100).forEach(number -> algorithmParams.put("param" + number, randomInt(Integer.MAX_VALUE))); + XContentBuilder mapping = fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field("knn", Map.of("metric", "dot_product", "algorithm", Map.of("name", "HNSW", "parameters", algorithmParams))) + ); + final MapperParsingException mapperParsingException = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(mapping) + ); + assertEquals( + "Invalid number of parameters for [algorithm], max allowed is [50] but given [100]", + mapperParsingException.getRootCause().getMessage() + ); + } + + public void testInvalidVectorNumberFormat() throws Exception { + DocumentMapper mapper = createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "L2", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 16, "beam_width", 100)) + ) + ) + ) + ); + final MapperParsingException mapperExceptionStringAsVectorValue = expectThrows( + MapperParsingException.class, + () -> mapper.parse(source(b -> b.field("field", "some malicious script content"))) + ); + assertEquals( + mapperExceptionStringAsVectorValue.getMessage(), + "failed to parse field [field] of type [dense_vector] in document with id '1'. Preview of field's value: 'some malicious script content'" + ); + + final MapperParsingException mapperExceptionInfinityVectorValue = expectThrows( + MapperParsingException.class, + () -> mapper.parse(source(b -> b.field("field", new Float[] { Float.POSITIVE_INFINITY }))) + ); + assertEquals( + mapperExceptionInfinityVectorValue.getMessage(), + "failed to parse field [field] of type [dense_vector] in document with id '1'. Preview of field's value: 'Infinity'" + ); + + final MapperParsingException mapperExceptionNullVectorValue = expectThrows( + MapperParsingException.class, + () -> mapper.parse(source(b -> b.field("field", new Float[] { null }))) + ); + assertEquals( + mapperExceptionNullVectorValue.getMessage(), + "failed to parse field [field] of type [dense_vector] in document with id '1'. Preview of field's value: 'null'" + ); + } + + public void testNullVectorValue() throws Exception { + DocumentMapper mapper = createDocumentMapper( + fieldMapping( + b -> b.field("type", "dense_vector") + .field("dimension", 2) + .field( + "knn", + Map.of( + "metric", + "L2", + "algorithm", + Map.of("name", "HNSW", "parameters", Map.of("max_connections", 16, "beam_width", 100)) + ) + ) + ) + ); + mapper.parse(source(b -> b.field("field", (Float) null))); + mapper.parse(source(b -> b.field("field", VECTOR))); + mapper.parse(source(b -> b.field("field", (Float) null))); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/DenseVectorFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DenseVectorFieldTypeTests.java index 31bfce1beaa5c..7ae117881a43f 100644 --- a/server/src/test/java/org/opensearch/index/mapper/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/DenseVectorFieldTypeTests.java @@ -7,458 +7,32 @@ import org.junit.Before; import org.mockito.Mockito; -import org.opensearch.common.CheckedConsumer; -import org.opensearch.common.Strings; -import org.opensearch.common.bytes.BytesReference; -import org.opensearch.common.compress.CompressedXContent; -import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.common.xcontent.XContentType; -import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.index.IndexService; +import org.opensearch.common.unit.Fuzziness; +import org.opensearch.index.mapper.DenseVectorFieldMapper.DenseVectorFieldType; import org.opensearch.index.query.QueryShardContext; -import org.opensearch.index.query.QueryShardException; -import org.opensearch.test.OpenSearchSingleNodeTestCase; -import java.io.IOException; import java.util.Arrays; -import java.util.HashMap; import java.util.Map; -import java.util.stream.IntStream; -import static org.hamcrest.Matchers.containsString; -import static org.opensearch.index.mapper.FieldTypeTestCase.MOCK_QSC_DISALLOW_EXPENSIVE; import static org.opensearch.index.mapper.KnnAlgorithmContext.Method.HNSW; import static org.opensearch.index.mapper.KnnAlgorithmContextFactory.HNSW_PARAMETER_BEAM_WIDTH; import static org.opensearch.index.mapper.KnnAlgorithmContextFactory.HNSW_PARAMETER_MAX_CONNECTIONS; -public class DenseVectorFieldTypeTests extends OpenSearchSingleNodeTestCase { - private static final String ALGORITHM_HNSW = "HNSW"; - private static final String DENSE_VECTOR_TYPE_NAME = "dense_vector"; - private static final int DIMENSION = 2; +public class DenseVectorFieldTypeTests extends FieldTypeTestCase { + private static final String FIELD_NAME = "field"; - private static final String METRIC_L2 = "L2"; private static final float[] VECTOR = { 2.0f, 4.5f }; - private IndexService indexService; - private DocumentMapperParser parser; - private MappedFieldType fieldType; + private DenseVectorFieldType fieldType; @Before public void setup() throws Exception { - indexService = createIndex("test"); - parser = indexService.mapperService().documentMapperParser(); - KnnAlgorithmContext knnMethodContext = new KnnAlgorithmContext( HNSW, Map.of(HNSW_PARAMETER_MAX_CONNECTIONS, 10, HNSW_PARAMETER_BEAM_WIDTH, 100) ); KnnContext knnContext = new KnnContext(Metric.L2, knnMethodContext); - fieldType = new DenseVectorFieldMapper.DenseVectorFieldType(FIELD_NAME, 1, knnContext); - } - - public void testIndexingWithoutEnablingKnn() throws IOException { - XContentBuilder mappingAllDefaults = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .endObject() - .endObject() - .endObject() - .endObject(); - parser.parse("type", new CompressedXContent(Strings.toString(mappingAllDefaults))).parse(source(b -> b.field(FIELD_NAME, VECTOR))); - } - - public void testIndexingWithDefaultParams() throws IOException { - XContentBuilder mappingAllDefaults = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of()) - .endObject() - .endObject() - .endObject() - .endObject(); - parser.parse("type", new CompressedXContent(Strings.toString(mappingAllDefaults))).parse(source(b -> b.field(FIELD_NAME, VECTOR))); - } - - public void testIndexingWithAlgorithmParameters() throws IOException { - XContentBuilder mapping = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field( - "knn", - Map.of( - "metric", - METRIC_L2, - "algorithm", - Map.of("name", ALGORITHM_HNSW, "parameters", Map.of("beam_width", 256, "max_connections", 16)) - ) - ) - .endObject() - .endObject() - .endObject() - .endObject(); - parser.parse("type", new CompressedXContent(Strings.toString(mapping))); - } - - public void testCosineMetric() throws IOException { - XContentBuilder mappingCosineMetric = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of("metric", "cosine", "algorithm", Map.of("name", ALGORITHM_HNSW))) - .endObject() - .endObject() - .endObject() - .endObject(); - parser.parse("type", new CompressedXContent(Strings.toString(mappingCosineMetric))).parse(source(b -> b.field(FIELD_NAME, VECTOR))); - } - - public void testDotProductMetric() throws IOException { - XContentBuilder mappingDotProductMetric = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of("metric", "dot_product", "algorithm", Map.of("name", ALGORITHM_HNSW))) - .endObject() - .endObject() - .endObject() - .endObject(); - parser.parse("type", new CompressedXContent(Strings.toString(mappingDotProductMetric))) - .parse(source(b -> b.field(FIELD_NAME, VECTOR))); - } - - public void testHNSWAlgorithmParametersInvalidInput() throws Exception { - XContentBuilder mappingInvalidMaxConnections = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field( - "knn", - Map.of( - "metric", - METRIC_L2, - "algorithm", - Map.of("name", ALGORITHM_HNSW, "parameters", Map.of("beam_width", 256, "max_connections", 50)) - ) - ) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionInvalidMaxConnections = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingInvalidMaxConnections))) - ); - org.hamcrest.MatcherAssert.assertThat( - mapperExceptionInvalidMaxConnections.getMessage(), - containsString("max_connections value cannot be greater than") - ); - - XContentBuilder mappingInvalidBeamWidth = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field( - "knn", - Map.of( - "metric", - METRIC_L2, - "algorithm", - Map.of("name", ALGORITHM_HNSW, "parameters", Map.of("beam_width", 1024, "max_connections", 6)) - ) - ) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionInvalidmBeamWidth = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingInvalidBeamWidth))) - ); - org.hamcrest.MatcherAssert.assertThat( - mapperExceptionInvalidmBeamWidth.getMessage(), - containsString("beam_width value cannot be greater than") - ); - - XContentBuilder mappingUnsupportedParam = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field( - "knn", - Map.of( - "metric", - METRIC_L2, - "algorithm", - Map.of("name", ALGORITHM_HNSW, "parameters", Map.of("beam_width", 256, "max_connections", 6, "some_param", 23)) - ) - ) - .endObject() - .endObject() - .endObject() - .endObject(); - - final IllegalArgumentException mapperExceptionUnsupportedParam = expectThrows( - IllegalArgumentException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingUnsupportedParam))) - ); - assertEquals(mapperExceptionUnsupportedParam.getMessage(), "Algorithm parameter [some_param] is not supported"); - } - - public void testInvalidVectorDimension() throws Exception { - XContentBuilder mappingMissingDimension = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("knn", Map.of()) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionMissingDimension = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingMissingDimension))) - ); - org.hamcrest.MatcherAssert.assertThat( - mapperExceptionMissingDimension.getMessage(), - containsString("[dimension] property must be specified for field") - ); - - XContentBuilder mappingInvalidDimension = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", 1200) - .field("knn", Map.of()) - .endObject() - .endObject() - .endObject() - .endObject(); - - final IllegalArgumentException exceptionInvalidDimension = expectThrows( - IllegalArgumentException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingInvalidDimension))) - ); - assertEquals(exceptionInvalidDimension.getMessage(), "[dimension] value 1200 cannot be greater than 1024 for vector [field]"); - - XContentBuilder mappingDimentionsMismatch = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of()) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionIDimentionsMismatch = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingDimentionsMismatch))) - .parse(source(b -> b.field(FIELD_NAME, new float[] { 2.0f, 4.5f, 5.6f }))) - ); - org.hamcrest.MatcherAssert.assertThat( - mapperExceptionIDimentionsMismatch.getMessage(), - containsString("failed to parse field [field] of type [dense_vector]") - ); - } - - public void testInvalidMetric() throws Exception { - XContentBuilder mappingInvalidMetric = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of("metric", "LAMBDA", "algorithm", Map.of("name", ALGORITHM_HNSW))) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionInvalidMetric = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingInvalidMetric))) - ); - org.hamcrest.MatcherAssert.assertThat( - mapperExceptionInvalidMetric.getMessage(), - containsString("[metric] value [LAMBDA] is invalid") - ); - } - - public void testInvalidAlgorithm() throws Exception { - XContentBuilder mappingInvalidAlgorithm = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of("metric", METRIC_L2, "algorithm", Map.of("name", "MY_ALGORITHM"))) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionInvalidAlgorithm = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingInvalidAlgorithm))) - ); - assertEquals(mapperExceptionInvalidAlgorithm.getMessage(), "[algorithm name] value [MY_ALGORITHM] is invalid or not supported"); - } - - public void testInvalidParams() throws Exception { - XContentBuilder mappingInvalidMaxConnections = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("my_field", "some_value") - .field("knn", Map.of()) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionInvalidMaxConnections = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingInvalidMaxConnections))) - ); - assertEquals( - mapperExceptionInvalidMaxConnections.getMessage(), - "unknown parameter [my_field] on mapper [field] of type [dense_vector]" - ); - } - - public void testExceedMaxNumberOfAlgorithmParams() throws Exception { - Map algorithmParams = new HashMap<>(); - IntStream.range(0, 100).forEach(number -> algorithmParams.put("param" + number, randomInt(Integer.MAX_VALUE))); - XContentBuilder mappingInvalidAlgorithm = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of("metric", METRIC_L2, "algorithm", Map.of("name", ALGORITHM_HNSW, "parameters", algorithmParams))) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionInvalidAlgorithm = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mappingInvalidAlgorithm))) - ); - assertEquals( - mapperExceptionInvalidAlgorithm.getMessage(), - "Invalid number of parameters for [algorithm], max allowed is [50] but given [100]" - ); - } - - public void testInvalidVectorNumberFormat() throws Exception { - XContentBuilder mapping = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", 1) - .field("knn", Map.of()) - .endObject() - .endObject() - .endObject() - .endObject(); - - final MapperParsingException mapperExceptionStringAsVectorValue = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mapping))) - .parse(source(b -> b.field(FIELD_NAME, "some malicious script content"))) - ); - assertEquals( - mapperExceptionStringAsVectorValue.getMessage(), - "failed to parse field [field] of type [dense_vector] in document with id '1'. Preview of field's value: 'some malicious script content'" - ); - - final MapperParsingException mapperExceptionInfinityVectorValue = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mapping))) - .parse(source(b -> b.field(FIELD_NAME, new Float[] { Float.POSITIVE_INFINITY }))) - ); - assertEquals( - mapperExceptionInfinityVectorValue.getMessage(), - "failed to parse field [field] of type [dense_vector] in document with id '1'. Preview of field's value: 'Infinity'" - ); - - final MapperParsingException mapperExceptionNullVectorValue = expectThrows( - MapperParsingException.class, - () -> parser.parse("type", new CompressedXContent(Strings.toString(mapping))) - .parse(source(b -> b.field(FIELD_NAME, new Float[] { null }))) - ); - assertEquals( - mapperExceptionNullVectorValue.getMessage(), - "failed to parse field [field] of type [dense_vector] in document with id '1'. Preview of field's value: 'null'" - ); - } - - public void testNullVectorValue() throws Exception { - XContentBuilder mapping = XContentFactory.jsonBuilder() - .startObject() - .startObject("type") - .startObject("properties") - .startObject(FIELD_NAME) - .field("type", DENSE_VECTOR_TYPE_NAME) - .field("dimension", DIMENSION) - .field("knn", Map.of()) - .endObject() - .endObject() - .endObject() - .endObject(); - - parser.parse("type", new CompressedXContent(Strings.toString(mapping))).parse(source(b -> b.field(FIELD_NAME, (Float) null))); - - parser.parse("type", new CompressedXContent(Strings.toString(mapping))).parse(source(b -> b.field(FIELD_NAME, VECTOR))); - - parser.parse("type", new CompressedXContent(Strings.toString(mapping))).parse(source(b -> b.field(FIELD_NAME, (Float) null))); + fieldType = new DenseVectorFieldType(FIELD_NAME, 1, knnContext); } public void testValueDisplay() { @@ -470,11 +44,7 @@ public void testValueDisplay() { Metric.L2, KnnAlgorithmContextFactory.defaultContext(KnnAlgorithmContext.Method.HNSW) ); - MappedFieldType ftDefaultAlgorithmContext = new DenseVectorFieldMapper.DenseVectorFieldType( - FIELD_NAME, - 1, - knnContextDEfaultAlgorithmContext - ); + MappedFieldType ftDefaultAlgorithmContext = new DenseVectorFieldType(FIELD_NAME, 1, knnContextDEfaultAlgorithmContext); Object actualFloatArrayDefaultAlgorithmContext = ftDefaultAlgorithmContext.valueForDisplay(VECTOR); assertTrue(actualFloatArrayDefaultAlgorithmContext instanceof float[]); assertArrayEquals(VECTOR, (float[]) actualFloatArrayDefaultAlgorithmContext, 0.0f); @@ -482,47 +52,42 @@ public void testValueDisplay() { public void testTermQueryNotSupported() { QueryShardContext context = Mockito.mock(QueryShardContext.class); - QueryShardException exception = expectThrows(QueryShardException.class, () -> fieldType.termsQuery(Arrays.asList(VECTOR), context)); - assertEquals(exception.getMessage(), "Dense_vector does not support exact searching, use KNN queries instead [field]"); + UnsupportedOperationException exception = expectThrows( + UnsupportedOperationException.class, + () -> fieldType.termsQuery(Arrays.asList(VECTOR), context) + ); + assertEquals(exception.getMessage(), "[term] queries are not supported on [dense_vector] fields."); } public void testPrefixQueryNotSupported() { - QueryShardException ee = expectThrows( - QueryShardException.class, + UnsupportedOperationException ee = expectThrows( + UnsupportedOperationException.class, () -> fieldType.prefixQuery("foo*", null, MOCK_QSC_DISALLOW_EXPENSIVE) ); - assertEquals( - "Can only use prefix queries on keyword, text and wildcard fields - not on [field] which is of type [dense_vector]", - ee.getMessage() - ); + assertEquals("[prefix] queries are not supported on [dense_vector] fields.", ee.getMessage()); } public void testRegexpQueryNotSupported() { - QueryShardException ee = expectThrows( - QueryShardException.class, + UnsupportedOperationException ee = expectThrows( + UnsupportedOperationException.class, () -> fieldType.regexpQuery("foo?", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE) ); - assertEquals( - "Can only use regexp queries on keyword and text fields - not on [field] which is of type [dense_vector]", - ee.getMessage() - ); + assertEquals("[regexp] queries are not supported on [dense_vector] fields.", ee.getMessage()); } public void testWildcardQueryNotSupported() { - QueryShardException ee = expectThrows( - QueryShardException.class, + UnsupportedOperationException ee = expectThrows( + UnsupportedOperationException.class, () -> fieldType.wildcardQuery("valu*", null, MOCK_QSC_DISALLOW_EXPENSIVE) ); - assertEquals( - "Can only use wildcard queries on keyword, text and wildcard fields - not on [field] which is of type [dense_vector]", - ee.getMessage() - ); + assertEquals("[wildcard] queries are not supported on [dense_vector] fields.", ee.getMessage()); } - private final SourceToParse source(CheckedConsumer build) throws IOException { - XContentBuilder builder = JsonXContent.contentBuilder().startObject(); - build.accept(builder); - builder.endObject(); - return new SourceToParse("test", "1", BytesReference.bytes(builder), XContentType.JSON); + public void testFuzzyQuery() { + UnsupportedOperationException e = expectThrows( + UnsupportedOperationException.class, + () -> fieldType.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, randomMockShardContext()) + ); + assertEquals("[fuzzy] queries are not supported on [dense_vector] fields.", e.getMessage()); } } diff --git a/server/src/test/java/org/opensearch/index/mapper/DenseVectorMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DenseVectorMapperTests.java deleted file mode 100644 index db663029f6139..0000000000000 --- a/server/src/test/java/org/opensearch/index/mapper/DenseVectorMapperTests.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.index.mapper; - -import org.opensearch.common.Strings; -import org.opensearch.common.xcontent.ToXContent; -import org.opensearch.common.xcontent.XContentBuilder; -import org.opensearch.common.xcontent.XContentFactory; -import org.opensearch.common.xcontent.json.JsonXContent; -import org.opensearch.index.mapper.DenseVectorFieldMapper.DenseVectorFieldType; - -import java.io.IOException; -import java.util.Map; -import java.util.Set; - -import static org.hamcrest.Matchers.containsString; -import static org.opensearch.index.mapper.KnnAlgorithmContext.Method.HNSW; -import static org.opensearch.index.mapper.KnnAlgorithmContextFactory.HNSW_PARAMETER_BEAM_WIDTH; -import static org.opensearch.index.mapper.KnnAlgorithmContextFactory.HNSW_PARAMETER_MAX_CONNECTIONS; - -public class DenseVectorMapperTests extends MapperServiceTestCase { - - private static final float[] VECTOR = { 2.0f, 4.5f }; - - public void testValueDisplay() { - KnnAlgorithmContext knnMethodContext = new KnnAlgorithmContext( - HNSW, - Map.of(HNSW_PARAMETER_MAX_CONNECTIONS, 16, HNSW_PARAMETER_BEAM_WIDTH, 100) - ); - KnnContext knnContext = new KnnContext(Metric.L2, knnMethodContext); - MappedFieldType ft = new DenseVectorFieldType("field", 1, knnContext); - Object actualFloatArray = ft.valueForDisplay(VECTOR); - assertTrue(actualFloatArray instanceof float[]); - assertArrayEquals(VECTOR, (float[]) actualFloatArray, 0.0f); - } - - public void testSerializationWithoutKnn() throws IOException { - DocumentMapper defaultMapper = createDocumentMapper(fieldMapping(this::minimalMapping)); - Mapper mapper = defaultMapper.mappers().getMapper("field"); - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - mapper.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - assertEquals("{\"field\":{\"type\":\"dense_vector\",\"dimension\":2}}", Strings.toString(builder)); - } - - public void testSerializationWithKnn() throws IOException { - DocumentMapper defaultMapper = createDocumentMapper(fieldMapping(b -> { - minimalMapping(b); - b.field("knn", Map.of()); - })); - Mapper mapper = defaultMapper.mappers().getMapper("field"); - XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - mapper.toXContent(builder, ToXContent.EMPTY_PARAMS); - builder.endObject(); - assertTrue( - Set.of( - "{\"field\":{\"type\":\"dense_vector\"," - + "\"dimension\":2," - + "\"knn\":" - + "{\"metric\":\"L2\"," - + "\"algorithm\":{" - + "\"name\":\"HNSW\"," - + "\"parameters\":{\"beam_width\":100,\"max_connections\":16}}}}}", - "{\"field\":{\"type\":\"dense_vector\"," - + "\"dimension\":2," - + "\"knn\":" - + "{\"metric\":\"L2\"," - + "\"algorithm\":{" - + "\"name\":\"HNSW\"," - + "\"parameters\":{\"max_connections\":16,\"beam_width\":100}}}}}" - ).contains(Strings.toString(builder)) - ); - } - - public void testMinimalToMaximal() throws IOException { - XContentBuilder orig = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, INCLUDE_DEFAULTS); - orig.endObject(); - XContentBuilder parsedFromOrig = JsonXContent.contentBuilder().startObject(); - createMapperService(orig).documentMapper().mapping().toXContent(parsedFromOrig, INCLUDE_DEFAULTS); - parsedFromOrig.endObject(); - assertEquals(Strings.toString(orig), Strings.toString(parsedFromOrig)); - } - - public void testDeprecatedBoost() throws IOException { - createMapperService(fieldMapping(b -> { - minimalMapping(b); - b.field("boost", 2.0); - })); - String type = typeName(); - String[] warnings = new String[] { - "Parameter [boost] on field [field] is deprecated and will be removed in 8.0", - "Parameter [boost] has no effect on type [" + type + "] and will be removed in future" }; - allowedWarnings(warnings); - } - - public void testIfMinimalSerializesToItself() throws IOException { - XContentBuilder orig = JsonXContent.contentBuilder().startObject(); - createMapperService(fieldMapping(this::minimalMapping)).documentMapper().mapping().toXContent(orig, ToXContent.EMPTY_PARAMS); - orig.endObject(); - XContentBuilder parsedFromOrig = JsonXContent.contentBuilder().startObject(); - createMapperService(orig).documentMapper().mapping().toXContent(parsedFromOrig, ToXContent.EMPTY_PARAMS); - parsedFromOrig.endObject(); - assertEquals(Strings.toString(orig), Strings.toString(parsedFromOrig)); - } - - public void testForEmptyName() { - MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(mapping(b -> { - b.startObject(""); - minimalMapping(b); - b.endObject(); - }))); - assertThat(e.getMessage(), containsString("name cannot be empty string")); - } - - protected void writeFieldValue(XContentBuilder b) throws IOException { - b.value(new float[] { 2.5f }); - } - - protected void minimalMapping(XContentBuilder b) throws IOException { - b.field("type", "dense_vector"); - b.field("dimension", 2); - // b.field("knn", Map.of()); - } - - protected void registerParameters(MapperTestCase.ParameterChecker checker) throws IOException { - checker.registerConflictCheck("doc_values", b -> b.field("doc_values", false)); - checker.registerConflictCheck("index", b -> b.field("index", false)); - checker.registerConflictCheck("store", b -> b.field("store", false)); - } - - protected String typeName() throws IOException { - MapperService ms = createMapperService(fieldMapping(this::minimalMapping)); - return ms.fieldType("field").typeName(); - } -}