From 1be5e3aa1a73a4bbc10d53d7dcc8280ee61b576f Mon Sep 17 00:00:00 2001 From: Bharathwaj G Date: Mon, 8 Jul 2024 19:31:58 +0530 Subject: [PATCH] Star tree codec changes (#14514) --------- Signed-off-by: Bharathwaj G --- .../opensearch/index/codec/CodecService.java | 16 ++- .../codec/composite/Composite99Codec.java | 57 +++++++++ .../composite/Composite99DocValuesFormat.java | 64 ++++++++++ .../composite/Composite99DocValuesReader.java | 89 ++++++++++++++ .../composite/Composite99DocValuesWriter.java | 114 ++++++++++++++++++ .../composite/CompositeCodecFactory.java | 42 +++++++ .../codec/composite/CompositeIndexReader.java | 34 ++++++ .../codec/composite/CompositeIndexValues.java | 21 ++++ .../datacube/startree/StarTreeValues.java | 35 ++++++ .../datacube/startree/package-info.java | 12 ++ .../index/codec/composite/package-info.java | 12 ++ .../mapper/CompositeMappedFieldType.java | 3 + .../index/mapper/MapperService.java | 9 ++ .../services/org.apache.lucene.codecs.Codec | 1 + .../opensearch/index/codec/CodecTests.java | 47 ++++++++ .../StarTreeDocValuesFormatTests.java | 110 +++++++++++++++++ 16 files changed, 662 insertions(+), 4 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/Composite99Codec.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/CompositeCodecFactory.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexValues.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/codec/composite/package-info.java create mode 100644 server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec create mode 100644 server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index 67f38536a0d11..59fafdf1ba74e 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -39,6 +39,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; import org.opensearch.index.IndexSettings; +import org.opensearch.index.codec.composite.CompositeCodecFactory; import org.opensearch.index.mapper.MapperService; import java.util.Map; @@ -63,6 +64,7 @@ public class CodecService { * the raw unfiltered lucene default. useful for testing */ public static final String LUCENE_DEFAULT_CODEC = "lucene_default"; + private final CompositeCodecFactory compositeCodecFactory = new CompositeCodecFactory(); public CodecService(@Nullable MapperService mapperService, IndexSettings indexSettings, Logger logger) { final MapBuilder codecs = MapBuilder.newMapBuilder(); @@ -73,10 +75,16 @@ public CodecService(@Nullable MapperService mapperService, IndexSettings indexSe codecs.put(BEST_COMPRESSION_CODEC, new Lucene99Codec(Mode.BEST_COMPRESSION)); codecs.put(ZLIB, new Lucene99Codec(Mode.BEST_COMPRESSION)); } else { - codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); - codecs.put(LZ4, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); - codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); - codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); + // CompositeCodec still delegates to PerFieldMappingPostingFormatCodec + // We can still support all the compression codecs when composite index is present + if (mapperService.isCompositeIndexPresent()) { + codecs.putAll(compositeCodecFactory.getCompositeIndexCodecs(mapperService, logger)); + } else { + codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); + codecs.put(LZ4, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); + codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); + codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); + } } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99Codec.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99Codec.java new file mode 100644 index 0000000000000..de04944e67cd2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99Codec.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec; +import org.opensearch.index.mapper.MapperService; + +/** + * Extends the Codec to support new file formats for composite indices eg: star tree index + * based on the mappings. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class Composite99Codec extends FilterCodec { + public static final String COMPOSITE_INDEX_CODEC_NAME = "Composite99Codec"; + private final MapperService mapperService; + + // needed for SPI - this is used in reader path + public Composite99Codec() { + this(COMPOSITE_INDEX_CODEC_NAME, new Lucene99Codec(), null); + } + + public Composite99Codec(Lucene99Codec.Mode compressionMode, MapperService mapperService, Logger logger) { + this(COMPOSITE_INDEX_CODEC_NAME, new PerFieldMappingPostingFormatCodec(compressionMode, mapperService, logger), mapperService); + } + + /** + * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec and a unique name to + * this ctor. + * + * @param name name of the codec + * @param delegate codec delegate + * @param mapperService mapper service instance + */ + protected Composite99Codec(String name, Codec delegate, MapperService mapperService) { + super(name, delegate); + this.mapperService = mapperService; + } + + @Override + public DocValuesFormat docValuesFormat() { + return new Composite99DocValuesFormat(mapperService); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java new file mode 100644 index 0000000000000..216ed4f68f333 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesFormat.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MapperService; + +import java.io.IOException; + +/** + * DocValues format to handle composite indices + * + * @opensearch.experimental + */ +@ExperimentalApi +public class Composite99DocValuesFormat extends DocValuesFormat { + /** + * Creates a new docvalues format. + * + *

The provided name will be written into the index segment in some configurations (such as + * when using {@code PerFieldDocValuesFormat}): in such configurations, for the segment to be read + * this class should be registered with Java's SPI mechanism (registered in META-INF/ of your jar + * file, etc). + */ + private final DocValuesFormat delegate; + private final MapperService mapperService; + + // needed for SPI + public Composite99DocValuesFormat() { + this(new Lucene90DocValuesFormat(), null); + } + + public Composite99DocValuesFormat(MapperService mapperService) { + this(new Lucene90DocValuesFormat(), mapperService); + } + + public Composite99DocValuesFormat(DocValuesFormat delegate, MapperService mapperService) { + super(delegate.getName()); + this.delegate = delegate; + this.mapperService = mapperService; + } + + @Override + public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + return new Composite99DocValuesWriter(delegate.fieldsConsumer(state), state, mapperService); + } + + @Override + public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException { + return new Composite99DocValuesReader(delegate.fieldsProducer(state), state); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java new file mode 100644 index 0000000000000..82c844088cfd4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesReader.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.List; + +/** + * Reader for star tree index and star tree doc values from the segments + * + * @opensearch.experimental + */ +@ExperimentalApi +public class Composite99DocValuesReader extends DocValuesProducer implements CompositeIndexReader { + private DocValuesProducer delegate; + + public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState state) throws IOException { + this.delegate = producer; + // TODO : read star tree files + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + return delegate.getNumeric(field); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + return delegate.getBinary(field); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return delegate.getSorted(field); + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + return delegate.getSortedNumeric(field); + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + return delegate.getSortedSet(field); + } + + @Override + public void checkIntegrity() throws IOException { + delegate.checkIntegrity(); + // Todo : check integrity of composite index related [star tree] files + } + + @Override + public void close() throws IOException { + delegate.close(); + // Todo: close composite index related files [star tree] files + } + + @Override + public List getCompositeIndexFields() { + // todo : read from file formats and get the field names. + throw new UnsupportedOperationException(); + + } + + @Override + public CompositeIndexValues getCompositeIndexValues(String field, CompositeMappedFieldType.CompositeFieldType fieldType) + throws IOException { + // TODO : read compositeIndexValues [starTreeValues] from star tree files + throw new UnsupportedOperationException(); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java new file mode 100644 index 0000000000000..75bbf78dbdad2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite99DocValuesWriter.java @@ -0,0 +1,114 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite; + +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.index.SegmentWriteState; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.CompositeMappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.StarTreeMapper; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + +/** + * This class write the star tree index and star tree doc values + * based on the doc values structures of the original index + * + * @opensearch.experimental + */ +@ExperimentalApi +public class Composite99DocValuesWriter extends DocValuesConsumer { + private final DocValuesConsumer delegate; + private final SegmentWriteState state; + private final MapperService mapperService; + AtomicReference mergeState = new AtomicReference<>(); + private final Set compositeMappedFieldTypes; + private final Set compositeFieldSet; + + private final Map fieldProducerMap = new HashMap<>(); + + public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService) { + + this.delegate = delegate; + this.state = segmentWriteState; + this.mapperService = mapperService; + this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes(); + compositeFieldSet = new HashSet<>(); + for (CompositeMappedFieldType type : compositeMappedFieldTypes) { + compositeFieldSet.addAll(type.fields()); + } + } + + @Override + public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addNumericField(field, valuesProducer); + } + + @Override + public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addBinaryField(field, valuesProducer); + } + + @Override + public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addSortedField(field, valuesProducer); + } + + @Override + public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addSortedNumericField(field, valuesProducer); + // Perform this only during flush flow + if (mergeState.get() == null) { + createCompositeIndicesIfPossible(valuesProducer, field); + } + } + + @Override + public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { + delegate.addSortedSetField(field, valuesProducer); + } + + @Override + public void close() throws IOException { + delegate.close(); + } + + private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer, FieldInfo field) throws IOException { + if (compositeFieldSet.isEmpty()) return; + if (compositeFieldSet.contains(field.name)) { + fieldProducerMap.put(field.name, valuesProducer); + compositeFieldSet.remove(field.name); + } + // we have all the required fields to build composite fields + if (compositeFieldSet.isEmpty()) { + for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) { + if (mappedType instanceof StarTreeMapper.StarTreeFieldType) { + // TODO : Call StarTree builder + } + } + } + } + + @Override + public void merge(MergeState mergeState) throws IOException { + this.mergeState.compareAndSet(null, mergeState); + super.merge(mergeState); + // TODO : handle merge star tree + // mergeStarTreeFields(mergeState); + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/CompositeCodecFactory.java b/server/src/main/java/org/opensearch/index/codec/composite/CompositeCodecFactory.java new file mode 100644 index 0000000000000..3acedc6a27d7f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/CompositeCodecFactory.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MapperService; + +import java.util.HashMap; +import java.util.Map; + +import static org.opensearch.index.codec.CodecService.BEST_COMPRESSION_CODEC; +import static org.opensearch.index.codec.CodecService.DEFAULT_CODEC; +import static org.opensearch.index.codec.CodecService.LZ4; +import static org.opensearch.index.codec.CodecService.ZLIB; + +/** + * Factory class to return the latest composite codec for all the modes + * + * @opensearch.experimental + */ +@ExperimentalApi +public class CompositeCodecFactory { + public CompositeCodecFactory() {} + + public Map getCompositeIndexCodecs(MapperService mapperService, Logger logger) { + Map codecs = new HashMap<>(); + codecs.put(DEFAULT_CODEC, new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, mapperService, logger)); + codecs.put(LZ4, new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, mapperService, logger)); + codecs.put(BEST_COMPRESSION_CODEC, new Composite99Codec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, logger)); + codecs.put(ZLIB, new Composite99Codec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, logger)); + return codecs; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java b/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java new file mode 100644 index 0000000000000..d02438b75377d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexReader.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.List; + +/** + * Interface that abstracts the functionality to read composite index structures from the segment + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface CompositeIndexReader { + /** + * Get list of composite index fields from the segment + * + */ + List getCompositeIndexFields(); + + /** + * Get composite index values based on the field name and the field type + */ + CompositeIndexValues getCompositeIndexValues(String field, CompositeMappedFieldType.CompositeFieldType fieldType) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexValues.java b/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexValues.java new file mode 100644 index 0000000000000..f8848aceab343 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/CompositeIndexValues.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite; + +import org.opensearch.common.annotation.ExperimentalApi; + +/** + * Interface for composite index values + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface CompositeIndexValues { + CompositeIndexValues getValues(); +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java new file mode 100644 index 0000000000000..2a5b96ce2620a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeValues.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite.datacube.startree; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.codec.composite.CompositeIndexValues; + +import java.util.List; + +/** + * Concrete class that holds the star tree associated values from the segment + * + * @opensearch.experimental + */ +@ExperimentalApi +public class StarTreeValues implements CompositeIndexValues { + private final List dimensionsOrder; + + // TODO : come up with full set of vales such as dimensions and metrics doc values + star tree + public StarTreeValues(List dimensionsOrder) { + super(); + this.dimensionsOrder = List.copyOf(dimensionsOrder); + } + + @Override + public CompositeIndexValues getValues() { + return this; + } +} diff --git a/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java new file mode 100644 index 0000000000000..67808ad51289a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/datacube/startree/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * classes responsible for handling all star tree structures and operations as part of codec + */ +package org.opensearch.index.codec.composite.datacube.startree; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/package-info.java b/server/src/main/java/org/opensearch/index/codec/composite/package-info.java new file mode 100644 index 0000000000000..5d15e99c00975 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/codec/composite/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * classes responsible for handling all composite index codecs and operations + */ +package org.opensearch.index.codec.composite; diff --git a/server/src/main/java/org/opensearch/index/mapper/CompositeMappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/CompositeMappedFieldType.java index f52ce29a86dd2..e067e70621304 100644 --- a/server/src/main/java/org/opensearch/index/mapper/CompositeMappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/CompositeMappedFieldType.java @@ -45,7 +45,10 @@ public CompositeMappedFieldType(String name, List fields, CompositeField /** * Supported composite field types + * + * @opensearch.experimental */ + @ExperimentalApi public enum CompositeFieldType { STAR_TREE("star_tree"); diff --git a/server/src/main/java/org/opensearch/index/mapper/MapperService.java b/server/src/main/java/org/opensearch/index/mapper/MapperService.java index c2e7411a3b47a..530a3092a5aa7 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/opensearch/index/mapper/MapperService.java @@ -226,6 +226,8 @@ public enum MergeReason { private final BooleanSupplier idFieldDataEnabled; + private volatile Set compositeMappedFieldTypes; + public MapperService( IndexSettings indexSettings, IndexAnalyzers indexAnalyzers, @@ -542,6 +544,9 @@ private synchronized Map internalMerge(DocumentMapper ma } assert results.values().stream().allMatch(this::assertSerialization); + + // initialize composite fields post merge + this.compositeMappedFieldTypes = getCompositeFieldTypesFromMapper(); return results; } @@ -655,6 +660,10 @@ public boolean isCompositeIndexPresent() { } public Set getCompositeFieldTypes() { + return compositeMappedFieldTypes; + } + + private Set getCompositeFieldTypesFromMapper() { Set compositeMappedFieldTypes = new HashSet<>(); if (this.mapper == null) { return Collections.emptySet(); diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec new file mode 100644 index 0000000000000..e030a813373c1 --- /dev/null +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -0,0 +1 @@ +org.opensearch.index.codec.composite.Composite99Codec diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index b31edd79411d0..7146b7dc51753 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -48,6 +48,7 @@ import org.opensearch.env.Environment; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.codec.composite.Composite99Codec; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.similarity.SimilarityService; @@ -59,6 +60,8 @@ import java.io.IOException; import java.util.Collections; +import org.mockito.Mockito; + import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING; import static org.hamcrest.Matchers.instanceOf; @@ -76,23 +79,52 @@ public void testDefault() throws Exception { assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_SPEED, codec); } + public void testDefaultWithCompositeIndex() throws Exception { + Codec codec = createCodecService(false, true).codec("default"); + assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_SPEED, codec); + assert codec instanceof Composite99Codec; + } + public void testBestCompression() throws Exception { Codec codec = createCodecService(false).codec("best_compression"); assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_COMPRESSION, codec); } + public void testBestCompressionWithCompositeIndex() throws Exception { + Codec codec = createCodecService(false, true).codec("best_compression"); + assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_COMPRESSION, codec); + assert codec instanceof Composite99Codec; + } + public void testLZ4() throws Exception { Codec codec = createCodecService(false).codec("lz4"); assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_SPEED, codec); assert codec instanceof PerFieldMappingPostingFormatCodec; } + public void testLZ4WithCompositeIndex() throws Exception { + Codec codec = createCodecService(false, true).codec("lz4"); + assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_SPEED, codec); + assert codec instanceof Composite99Codec; + } + public void testZlib() throws Exception { Codec codec = createCodecService(false).codec("zlib"); assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_COMPRESSION, codec); assert codec instanceof PerFieldMappingPostingFormatCodec; } + public void testZlibWithCompositeIndex() throws Exception { + Codec codec = createCodecService(false, true).codec("zlib"); + assertStoredFieldsCompressionEquals(Lucene99Codec.Mode.BEST_COMPRESSION, codec); + assert codec instanceof Composite99Codec; + } + + public void testResolveDefaultCodecsWithCompositeIndex() throws Exception { + CodecService codecService = createCodecService(false, true); + assertThat(codecService.codec("default"), instanceOf(Composite99Codec.class)); + } + public void testBestCompressionWithCompressionLevel() { final Settings settings = Settings.builder() .put(INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getKey(), randomIntBetween(1, 6)) @@ -150,10 +182,17 @@ private void assertStoredFieldsCompressionEquals(Lucene99Codec.Mode expected, Co } private CodecService createCodecService(boolean isMapperServiceNull) throws IOException { + return createCodecService(isMapperServiceNull, false); + } + + private CodecService createCodecService(boolean isMapperServiceNull, boolean isCompositeIndexPresent) throws IOException { Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); if (isMapperServiceNull) { return new CodecService(null, IndexSettingsModule.newIndexSettings("_na", nodeSettings), LogManager.getLogger("test")); } + if (isCompositeIndexPresent) { + return buildCodecServiceWithCompositeIndex(nodeSettings); + } return buildCodecService(nodeSettings); } @@ -176,6 +215,14 @@ private CodecService buildCodecService(Settings nodeSettings) throws IOException return new CodecService(service, indexSettings, LogManager.getLogger("test")); } + private CodecService buildCodecServiceWithCompositeIndex(Settings nodeSettings) throws IOException { + + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); + MapperService service = Mockito.mock(MapperService.class); + Mockito.when(service.isCompositeIndexPresent()).thenReturn(true); + return new CodecService(service, indexSettings, LogManager.getLogger("test")); + } + private SegmentReader getSegmentReader(Codec codec) throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(null); diff --git a/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java new file mode 100644 index 0000000000000..6c6d26656e4de --- /dev/null +++ b/server/src/test/java/org/opensearch/index/codec/composite/datacube/startree/StarTreeDocValuesFormatTests.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.codec.composite.datacube.startree; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.common.Rounding; +import org.opensearch.index.codec.composite.Composite99Codec; +import org.opensearch.index.compositeindex.datacube.DateDimension; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.StarTreeMapper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.mockito.Mockito; + +/** + * Star tree doc values Lucene tests + */ +@LuceneTestCase.SuppressSysoutChecks(bugUrl = "we log a lot on purpose") +public class StarTreeDocValuesFormatTests extends BaseDocValuesFormatTestCase { + @Override + protected Codec getCodec() { + MapperService service = Mockito.mock(MapperService.class); + Mockito.when(service.getCompositeFieldTypes()).thenReturn(Set.of(getStarTreeFieldType())); + final Logger testLogger = LogManager.getLogger(StarTreeDocValuesFormatTests.class); + return new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, service, testLogger); + } + + private StarTreeMapper.StarTreeFieldType getStarTreeFieldType() { + List m1 = new ArrayList<>(); + m1.add(MetricStat.MAX); + Metric metric = new Metric("sndv", m1); + List d1CalendarIntervals = new ArrayList<>(); + d1CalendarIntervals.add(Rounding.DateTimeUnit.HOUR_OF_DAY); + StarTreeField starTreeField = getStarTreeField(d1CalendarIntervals, metric); + + return new StarTreeMapper.StarTreeFieldType("star_tree", starTreeField); + } + + private static StarTreeField getStarTreeField(List d1CalendarIntervals, Metric metric1) { + DateDimension d1 = new DateDimension("field", d1CalendarIntervals); + NumericDimension d2 = new NumericDimension("dv"); + + List metrics = List.of(metric1); + List dims = List.of(d1, d2); + StarTreeFieldConfiguration config = new StarTreeFieldConfiguration( + 100, + Collections.emptySet(), + StarTreeFieldConfiguration.StarTreeBuildMode.OFF_HEAP + ); + + return new StarTreeField("starTree", dims, metrics, config); + } + + public void testStarTreeDocValues() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + Document doc = new Document(); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedNumericDocValuesField("dv", 1)); + doc.add(new SortedNumericDocValuesField("field", 1)); + iw.addDocument(doc); + doc.add(new SortedNumericDocValuesField("sndv", 1)); + doc.add(new SortedNumericDocValuesField("dv", 1)); + doc.add(new SortedNumericDocValuesField("field", 1)); + iw.addDocument(doc); + iw.forceMerge(1); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedNumericDocValuesField("dv", 2)); + doc.add(new SortedNumericDocValuesField("field", 2)); + iw.addDocument(doc); + doc.add(new SortedNumericDocValuesField("sndv", 2)); + doc.add(new SortedNumericDocValuesField("dv", 2)); + doc.add(new SortedNumericDocValuesField("field", 2)); + iw.addDocument(doc); + iw.forceMerge(1); + iw.close(); + + // TODO : validate star tree structures that got created + directory.close(); + } +}