From 99f008d35b7a6e00aac450678100d55a1ef9c726 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Mon, 8 Jul 2024 21:25:41 +0530 Subject: [PATCH] read for composite index values Signed-off-by: Sarthak Aggarwal --- .../lucene90/StarTree99DocValuesProducer.java | 162 ++++++++++++++++++ .../lucene/index/BaseStarTreeBuilder.java | 1 - .../composite/Composite90DocValuesFormat.java | 10 +- .../composite/Composite90DocValuesReader.java | 129 ++++++++++++-- .../datacube/MergeDimension.java | 56 ++++++ .../aggregators/MetricAggregatorInfo.java | 7 +- .../startree/meta/StarTreeMetadata.java | 80 ++++++++- .../datacube/startree/meta/TreeMetadata.java | 34 ++++ .../startree/node/OffHeapStarTreeNode.java | 6 +- .../startree/utils/StarTreeConstants.java | 28 +++ .../utils/StarTreeDataSerializer.java | 2 +- .../utils/StarTreeMetaSerializer.java | 71 +++++--- 12 files changed, 535 insertions(+), 51 deletions(-) create mode 100644 server/src/main/java/org/apache/lucene/codecs/lucene90/StarTree99DocValuesProducer.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/MergeDimension.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeConstants.java diff --git a/server/src/main/java/org/apache/lucene/codecs/lucene90/StarTree99DocValuesProducer.java b/server/src/main/java/org/apache/lucene/codecs/lucene90/StarTree99DocValuesProducer.java new file mode 100644 index 0000000000000..2d96d5a505694 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/codecs/lucene90/StarTree99DocValuesProducer.java @@ -0,0 +1,162 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.codecs.lucene90; + +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeConstants; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure. + * It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric, + * and sorted set) for fields in the Star Tree index. + * + * @opensearch.experimental + */ +public class StarTree99DocValuesProducer extends DocValuesProducer { + + Lucene90DocValuesProducer lucene90DocValuesProducer; + private final List dimensions; + private final List metrics; + private final FieldInfos fieldInfos; + + public StarTree99DocValuesProducer( + SegmentReadState state, + String dataCodec, + String dataExtension, + String metaCodec, + String metaExtension, + List dimensions, + List metricEntries, + String compositeFieldName + ) throws IOException { + this.dimensions = dimensions; + this.metrics = new ArrayList<>(); + for (MetricEntry metricEntry : metricEntries) { + this.metrics.add( + MetricAggregatorInfo.toFieldName(compositeFieldName, metricEntry.getMetricName(), metricEntry.getMetricStat().getTypeName()) + ); + } + + // populates the dummy list of field infos to fetch doc id set iterators for respective fields. + this.fieldInfos = new FieldInfos(getFieldInfoList()); + SegmentReadState segmentReadState = new SegmentReadState(state.directory, state.segmentInfo, fieldInfos, state.context); + lucene90DocValuesProducer = new Lucene90DocValuesProducer(segmentReadState, dataCodec, dataExtension, metaCodec, metaExtension); + } + + @Override + public NumericDocValues getNumeric(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getNumeric(field); + } + + @Override + public BinaryDocValues getBinary(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getBinary(field); + } + + @Override + public SortedDocValues getSorted(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSorted(field); + } + + @Override + public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSortedNumeric(field); + } + + @Override + public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { + return this.lucene90DocValuesProducer.getSortedSet(field); + } + + @Override + public void checkIntegrity() throws IOException { + this.lucene90DocValuesProducer.checkIntegrity(); + } + + // returns the doc id set iterator based on field name + public SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException { + return this.lucene90DocValuesProducer.getSortedNumeric(fieldInfos.fieldInfo(fieldName)); + } + + @Override + public void close() throws IOException { + this.lucene90DocValuesProducer.close(); + } + + private FieldInfo[] getFieldInfoList() { + FieldInfo[] fieldInfoList = new FieldInfo[this.dimensions.size() + metrics.size()]; + int fieldNumber = 0; + + for (FieldInfo dimension : this.dimensions) { + fieldInfoList[fieldNumber] = new FieldInfo( + dimension.getName() + StarTreeConstants.DIMENSION_SUFFIX, + fieldNumber, + false, + dimension.omitsNorms(), + dimension.hasPayloads(), + dimension.getIndexOptions(), + dimension.getDocValuesType(), + -1, + dimension.attributes(), + dimension.getPointDimensionCount(), + dimension.getPointIndexDimensionCount(), + dimension.getPointNumBytes(), + dimension.getVectorDimension(), + dimension.getVectorEncoding(), + dimension.getVectorSimilarityFunction(), + false, + dimension.isParentField() + ); + fieldNumber++; + } + for (String metric : metrics) { + fieldInfoList[fieldNumber] = new FieldInfo( + metric + StarTreeConstants.METRIC_SUFFIX, + fieldNumber, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + fieldNumber++; + } + return fieldInfoList; + } + +} diff --git a/server/src/main/java/org/apache/lucene/index/BaseStarTreeBuilder.java b/server/src/main/java/org/apache/lucene/index/BaseStarTreeBuilder.java index 2d0e693bb7c60..07037c188a14c 100644 --- a/server/src/main/java/org/apache/lucene/index/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/apache/lucene/index/BaseStarTreeBuilder.java @@ -127,7 +127,6 @@ public List generateMetricAggregatorInfos(MapperService ma for (Metric metric : this.starTreeField.getMetrics()) { for (MetricStat metricType : metric.getMetrics()) { IndexNumericFieldData.NumericType numericType; - SequentialDocValuesIterator metricStatReader; Mapper fieldMapper = mapperService.documentMapper().mappers().getMapper(metric.getField()); if (fieldMapper instanceof NumberFieldMapper) { numericType = ((NumberFieldMapper) fieldMapper).fieldType().numericType(); diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesFormat.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesFormat.java index 3643eeeff27eb..ed70158997f9a 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesFormat.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesFormat.java @@ -38,10 +38,10 @@ public class Composite90DocValuesFormat extends DocValuesFormat { private final MapperService mapperService; /** Data codec name for Composite Doc Values Format */ - public static final String DATA_CODEC_NAME = "Composite90FormatData"; + public static final String DATA_CODEC_NAME = "Composite99FormatData"; /** Meta codec name for Composite Doc Values Format */ - public static final String META_CODEC_NAME = "Composite90FormatMeta"; + public static final String META_CODEC_NAME = "Composite99FormatMeta"; /** Filename extension for the composite index data */ public static final String DATA_EXTENSION = "sttd"; @@ -49,6 +49,12 @@ public class Composite90DocValuesFormat extends DocValuesFormat { /** Filename extension for the composite index meta */ public static final String META_EXTENSION = "sttm"; + /** Data doc values codec name for Composite Doc Values Format */ + public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData"; + + /** Meta doc values codec name for Composite Doc Values Format */ + static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata"; + /** Filename extension for the composite index data doc values */ public static final String DATA_DOC_VALUES_EXTENSION = "sttddvm"; diff --git a/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesReader.java b/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesReader.java index d75350c2a8173..0c9c5d1d91063 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesReader.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/Composite90DocValuesReader.java @@ -12,6 +12,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.lucene90.StarTree99DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; @@ -21,19 +22,34 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.IOUtils; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.codec.composite.datacube.startree.StarTreeValues; import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.MergeDimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry; +import org.opensearch.index.compositeindex.datacube.startree.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.datacube.startree.node.OffHeapStarTree; import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeConstants; import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; /** * Reader for star tree index and star tree doc values from the segments @@ -48,11 +64,14 @@ public class Composite90DocValuesReader extends DocValuesProducer implements Com private final IndexInput dataIn; private final ChecksumIndexInput metaIn; private final Map starTreeMap = new LinkedHashMap<>(); - private final Map starTreeMetaMap = new LinkedHashMap<>(); + private final Map compositeIndexMetadataMap = new LinkedHashMap<>(); + private final Map compositeDocValuesProducerMap = new LinkedHashMap<>(); private final List compositeFieldInfos = new ArrayList<>(); + private final SegmentReadState readState; public Composite90DocValuesReader(DocValuesProducer producer, SegmentReadState readState) throws IOException { this.delegate = producer; + this.readState = readState; String metaFileName = IndexFileNames.segmentFileName( readState.segmentInfo.name, @@ -78,7 +97,6 @@ public Composite90DocValuesReader(DocValuesProducer producer, SegmentReadState r readState.segmentInfo.getId(), readState.segmentSuffix ); - CodecUtil.retrieveChecksum(dataIn); metaIn = readState.directory.openChecksumInput(metaFileName, readState.context); Throwable priorE = null; @@ -97,22 +115,40 @@ public Composite90DocValuesReader(DocValuesProducer producer, SegmentReadState r if (magicMarker == -1) { logger.info("EOF reached for composite index metadata"); - return; + break; } else if (magicMarker < 0) { throw new CorruptIndexException("Unknown token encountered: " + magicMarker, metaIn); } CompositeIndexMetadata compositeIndexMetadata = new CompositeIndexMetadata(metaIn, magicMarker); + String compositeFieldName = compositeIndexMetadata.getCompositeFieldName(); compositeFieldInfos.add( - new CompositeIndexFieldInfo( - compositeIndexMetadata.getCompositeFieldName(), - compositeIndexMetadata.getCompositeFieldType() - ) + new CompositeIndexFieldInfo(compositeFieldName, compositeIndexMetadata.getCompositeFieldType()) ); switch (compositeIndexMetadata.getCompositeFieldType()) { case STAR_TREE: - StarTree starTree = new OffHeapStarTree(dataIn, compositeIndexMetadata.getStarTreeMetadata()); - starTreeMap.put(compositeIndexMetadata.getCompositeFieldName(), starTree); - starTreeMetaMap.put(compositeIndexMetadata.getCompositeFieldName(), compositeIndexMetadata); + StarTreeMetadata starTreeMetadata = compositeIndexMetadata.getStarTreeMetadata(); + StarTree starTree = new OffHeapStarTree(dataIn, starTreeMetadata); + starTreeMap.put(compositeFieldName, starTree); + compositeIndexMetadataMap.put(compositeFieldName, compositeIndexMetadata); + + List dimensionFieldNumbers = starTreeMetadata.getDimensionFieldNumbers(); + List dimensions = new ArrayList<>(); + for (Integer fieldNumber : dimensionFieldNumbers) { + dimensions.add(readState.fieldInfos.fieldInfo(fieldNumber)); + } + + StarTree99DocValuesProducer starTreeDocValuesProducer = new StarTree99DocValuesProducer( + readState, + Composite90DocValuesFormat.DATA_DOC_VALUES_CODEC, + Composite90DocValuesFormat.DATA_DOC_VALUES_EXTENSION, + Composite90DocValuesFormat.META_DOC_VALUES_CODEC, + Composite90DocValuesFormat.META_DOC_VALUES_EXTENSION, + dimensions, + starTreeMetadata.getMetricEntries(), + compositeFieldName + ); + compositeDocValuesProducerMap.put(compositeFieldName, starTreeDocValuesProducer); + break; default: throw new CorruptIndexException("Invalid composite field type found in the file", dataIn); @@ -123,14 +159,12 @@ public Composite90DocValuesReader(DocValuesProducer producer, SegmentReadState r } finally { CodecUtil.checkFooter(metaIn, priorE); } - CodecUtil.retrieveChecksum(dataIn); success = true; } finally { if (success == false) { IOUtils.closeWhileHandlingException(this); } } - } @Override @@ -169,7 +203,8 @@ public void checkIntegrity() throws IOException { public void close() throws IOException { delegate.close(); starTreeMap.clear(); - starTreeMetaMap.clear(); + compositeIndexMetadataMap.clear(); + compositeDocValuesProducerMap.clear(); } @Override @@ -180,8 +215,72 @@ public List getCompositeIndexFields() { @Override public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo compositeIndexFieldInfo) throws IOException { - // TODO : read compositeIndexValues [starTreeValues] from star tree files - throw new UnsupportedOperationException(); + switch (compositeIndexFieldInfo.getType()) { + case STAR_TREE: + CompositeIndexMetadata compositeIndexMetadata = compositeIndexMetadataMap.get(compositeIndexFieldInfo.getField()); + StarTreeMetadata starTreeMetadata = compositeIndexMetadata.getStarTreeMetadata(); + Set skipStarNodeCreationInDimsFieldNumbers = starTreeMetadata.getSkipStarNodeCreationInDims(); + Set skipStarNodeCreationInDims = new HashSet<>(); + for (Integer fieldNumber : skipStarNodeCreationInDimsFieldNumbers) { + skipStarNodeCreationInDims.add(readState.fieldInfos.fieldInfo(fieldNumber).getName()); + } + + List dimensionFieldNumbers = starTreeMetadata.getDimensionFieldNumbers(); + List dimensions = new ArrayList<>(); + List mergeDimensions = new ArrayList<>(); + for (Integer fieldNumber : dimensionFieldNumbers) { + dimensions.add(readState.fieldInfos.fieldInfo(fieldNumber).getName()); + mergeDimensions.add(new MergeDimension(readState.fieldInfos.fieldInfo(fieldNumber).name)); + } + + Map starTreeMetricMap = new ConcurrentHashMap<>(); + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + String metricName = metricEntry.getMetricName(); + + Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); + metric.getMetrics().add(metricEntry.getMetricStat()); + } + List starTreeMetrics = new ArrayList<>(starTreeMetricMap.values()); + + StarTreeField starTreeField = new StarTreeField( + compositeIndexMetadata.getCompositeFieldName(), + mergeDimensions, + starTreeMetrics, + new StarTreeFieldConfiguration( + starTreeMetadata.getMaxLeafDocs(), + skipStarNodeCreationInDims, + starTreeMetadata.getStarTreeBuildMode() + ) + ); + StarTreeNode rootNode = starTreeMap.get(compositeIndexFieldInfo.getField()).getRoot(); + StarTree99DocValuesProducer starTree99DocValuesProducer = (StarTree99DocValuesProducer) compositeDocValuesProducerMap.get( + compositeIndexMetadata.getCompositeFieldName() + ); + Map dimensionsDocIdSetIteratorMap = new LinkedHashMap<>(); + Map metricsDocIdSetIteratorMap = new LinkedHashMap<>(); + + for (String dimension : dimensions) { + dimensionsDocIdSetIteratorMap.put( + dimension, + starTree99DocValuesProducer.getSortedNumeric(dimension + StarTreeConstants.DIMENSION_SUFFIX) + ); + } + + for (MetricEntry metricEntry : starTreeMetadata.getMetricEntries()) { + String metricFullName = MetricAggregatorInfo.toFieldName( + compositeIndexFieldInfo.getField(), + metricEntry.getMetricName(), + metricEntry.getMetricStat().getTypeName() + ); + metricsDocIdSetIteratorMap.put(metricFullName, starTree99DocValuesProducer.getSortedNumeric(metricFullName)); + } + + return new StarTreeValues(starTreeField, rootNode, dimensionsDocIdSetIteratorMap, metricsDocIdSetIteratorMap); + + default: + throw new CorruptIndexException("Unsupported composite index field type: ", compositeIndexFieldInfo.getType().getName()); + } + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MergeDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MergeDimension.java new file mode 100644 index 0000000000000..1e15cae2e0029 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MergeDimension.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube; + +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.mapper.CompositeDataCubeFieldType; + +import java.io.IOException; +import java.util.Objects; + +/** + * Composite index merge dimension class + * + * @opensearch.experimental + */ +public class MergeDimension implements Dimension { + public static final String MERGE = "merge"; + private final String field; + + public MergeDimension(String field) { + this.field = field; + } + + public String getField() { + return field; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + builder.field(CompositeDataCubeFieldType.NAME, field); + builder.field(CompositeDataCubeFieldType.TYPE, MERGE); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MergeDimension dimension = (MergeDimension) o; + return Objects.equals(field, dimension.getField()); + } + + @Override + public int hashCode() { + return Objects.hash(field); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java index 81dd0a99bc592..a3aa2f2fdde62 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/aggregators/MetricAggregatorInfo.java @@ -80,7 +80,12 @@ public StarTreeNumericType getAggregatedValueType() { * @return field name with metric type and field */ public String toFieldName() { - return starFieldName + DELIMITER + field + DELIMITER + metricStat.getTypeName(); + return toFieldName(starFieldName, field, metricStat.getTypeName()); + + } + + public static String toFieldName(String starFieldName, String field, String typeName) { + return starFieldName + DELIMITER + field + DELIMITER + typeName; } @Override diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java index c2585dc6e79eb..e328cb061e3f1 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/StarTreeMetadata.java @@ -12,11 +12,14 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.store.IndexInput; import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry; import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; /** * Holds the associated metadata for the building of star-tree @@ -28,9 +31,12 @@ public class StarTreeMetadata implements TreeMetadata { private final IndexInput meta; private final String starTreeFieldName; private final String starTreeFieldType; - private final List dimensionOrdinals; + private final List dimensionFieldNumbers; private final List metricEntries; private final Integer segmentAggregatedDocCount; + private final Integer maxLeafDocs; + private final Set skipStarNodeCreationInDims; + private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode; private final long dataStartFilePointer; private final long dataLength; @@ -39,9 +45,12 @@ public StarTreeMetadata(IndexInput meta, String compositeFieldName, String compo try { this.starTreeFieldName = compositeFieldName; this.starTreeFieldType = compositeFieldType; - this.dimensionOrdinals = readStarTreeDimensions(); + this.dimensionFieldNumbers = readStarTreeDimensions(); this.metricEntries = readMetricEntries(); this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); + this.maxLeafDocs = readMaxLeafDocs(); + this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims(); + this.starTreeBuildMode = readBuildMode(); this.dataStartFilePointer = readDataStartFilePointer(); this.dataLength = readDataLength(); } catch (Exception e) { @@ -58,13 +67,13 @@ public int readDimensionsCount() throws IOException { @Override public List readStarTreeDimensions() throws IOException { int dimensionCount = readDimensionsCount(); - List dimensionOrdinals = new ArrayList<>(); + List dimensionFieldNumbers = new ArrayList<>(); for (int i = 0; i < dimensionCount; i++) { - dimensionOrdinals.add(meta.readInt()); + dimensionFieldNumbers.add(meta.readInt()); } - return dimensionOrdinals; + return dimensionFieldNumbers; } @Override @@ -91,6 +100,32 @@ public int readSegmentAggregatedDocCount() throws IOException { return meta.readInt(); } + @Override + public int readMaxLeafDocs() throws IOException { + return meta.readInt(); + } + + @Override + public int readSkipStarNodeCreationInDimsCount() throws IOException { + return meta.readInt(); + } + + @Override + public Set readSkipStarNodeCreationInDims() throws IOException { + + int skipStarNodeCreationInDimsCount = readSkipStarNodeCreationInDimsCount(); + Set skipStarNodeCreationInDims = new HashSet<>(); + for (int i = 0; i < skipStarNodeCreationInDimsCount; i++) { + skipStarNodeCreationInDims.add(meta.readInt()); + } + return skipStarNodeCreationInDims; + } + + @Override + public StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException { + return StarTreeFieldConfiguration.StarTreeBuildMode.fromTypeName(meta.readString()); + } + @Override public long readDataStartFilePointer() throws IOException { return meta.readLong(); @@ -120,12 +155,12 @@ public String getStarTreeFieldType() { } /** - * Returns the list of dimension ordinals. + * Returns the list of dimension field numbers. * - * @return star-tree dimension ordinals + * @return star-tree dimension field numbers */ - public List getDimensionOrdinals() { - return dimensionOrdinals; + public List getDimensionFieldNumbers() { + return dimensionFieldNumbers; } /** @@ -146,6 +181,33 @@ public Integer getSegmentAggregatedDocCount() { return segmentAggregatedDocCount; } + /** + * Returns the max leaf docs for the star-tree. + * + * @return the max leaf docs. + */ + public Integer getMaxLeafDocs() { + return maxLeafDocs; + } + + /** + * Returns the set of dimensions for which star node will not be created in the star-tree. + * + * @return the set of dimensions. + */ + public Set getSkipStarNodeCreationInDims() { + return skipStarNodeCreationInDims; + } + + /** + * Returns the build mode for the star-tree. + * + * @return the star-tree build mode. + */ + public StarTreeFieldConfiguration.StarTreeBuildMode getStarTreeBuildMode() { + return starTreeBuildMode; + } + /** * Returns the file pointer to the start of the star-tree data. * diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java index f9c8db85ee17c..9859afad95a74 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/meta/TreeMetadata.java @@ -8,10 +8,12 @@ package org.opensearch.index.compositeindex.datacube.startree.meta; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricEntry; import java.io.IOException; import java.util.List; +import java.util.Set; /** * An interface for metadata of the star-tree @@ -60,6 +62,38 @@ public interface TreeMetadata { */ int readSegmentAggregatedDocCount() throws IOException; + /** + * Reads the max leaf docs for the star-tree. + * + * @return the max leaf docs for the star-tree + * @throws IOException if an I/O error occurs while reading the max leaf docs + */ + int readMaxLeafDocs() throws IOException; + + /** + * Reads the count of dimensions where star node will not be created in the star-tree. + * + * @return the count of dimensions + * @throws IOException if an I/O error occurs while reading the skip star node dimensions count + */ + int readSkipStarNodeCreationInDimsCount() throws IOException; + + /** + * Reads the list of dimensions field numbers to be skipped for star node creation in the star-tree. + * + * @return the set of dimensions field numbers to be skipped for star node creation. + * @throws IOException if an I/O error occurs while reading the dimensions + */ + Set readSkipStarNodeCreationInDims() throws IOException; + + /** + * Reads the build mode for the star-tree. + * + * @return the star-tree build mode + * @throws IOException if an I/O error occurs while reading the build mode + */ + StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException; + /** * Reads the file pointer to the start of the star-tree data. * diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTreeNode.java index 21da0a8e71d55..91ad56ef70eb3 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/OffHeapStarTreeNode.java @@ -52,6 +52,10 @@ private long getLong(int fieldOffset) throws IOException { return in.readLong(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); } + private byte getByte(int fieldOffset) throws IOException { + return in.readByte(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + @Override public int getDimensionId() throws IOException { return getInt(DIMENSION_ID_OFFSET); @@ -102,7 +106,7 @@ public boolean isLeaf() { @Override public boolean isStarNode() throws IOException { - return getInt(IS_STAR_NODE_OFFSET) != 0; + return getByte(IS_STAR_NODE_OFFSET) != 0; } @Override diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeConstants.java new file mode 100644 index 0000000000000..2670b7b41f4df --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeConstants.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +/** + * This class contains constant values used throughout the Star Tree index implementation. + * + * @opensearch.experimental + */ +public class StarTreeConstants { + + /** + * The suffix appended to dimension field names in the Star Tree index. + */ + public static final String DIMENSION_SUFFIX = "_dim"; + + /** + * The suffix appended to metric field names in the Star Tree index. + */ + public static final String METRIC_SUFFIX = "_metric"; + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java index b4f8d0955b351..54059458a6d0d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDataSerializer.java @@ -130,7 +130,7 @@ private static void writeStarTreeNode(IndexOutput output, StarTreeBuilderUtils.T output.writeInt(node.startDocId); output.writeInt(node.endDocId); output.writeInt(node.aggregatedDocId); - output.writeInt(node.isStarNode == false ? 0 : 1); + output.writeByte(node.isStarNode == false ? (byte) 0 : (byte) 1); output.writeInt(firstChildId); output.writeInt(lastChildId); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java index 4241642cf0ba5..dc91c97fb224f 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeMetaSerializer.java @@ -38,14 +38,14 @@ public class StarTreeMetaSerializer { /** * Serializes the star-tree metadata. * - * @param metaOut the IndexOutput to write the metadata - * @param compositeFieldType the composite field type of the star-tree field - * @param starTreeField the star-tree field - * @param writeState the segment write state - * @param metricAggregatorInfos the list of metric aggregator information + * @param metaOut the IndexOutput to write the metadata + * @param compositeFieldType the composite field type of the star-tree field + * @param starTreeField the star-tree field + * @param writeState the segment write state + * @param metricAggregatorInfos the list of metric aggregator information * @param segmentAggregatedCount the aggregated document count for the segment - * @param dataFilePointer the file pointer to the start of the star tree data - * @param dataFileLength the length of the star tree data file + * @param dataFilePointer the file pointer to the start of the star tree data + * @param dataFileLength the length of the star tree data file * @throws IOException if an I/O error occurs while serializing the metadata */ public static void serializeStarTreeMetadata( @@ -60,14 +60,28 @@ public static void serializeStarTreeMetadata( ) throws IOException { long totalSizeInBytes = 0; - totalSizeInBytes += computeHeaderByteSize(compositeFieldType, starTreeField.getName()); // header size - totalSizeInBytes += Integer.BYTES; // number of dimensions - totalSizeInBytes += (long) starTreeField.getDimensionsOrder().size() * Integer.BYTES; // dimension ids - totalSizeInBytes += Integer.BYTES; // metric count - totalSizeInBytes += computeMetricEntriesSizeInBytes(metricAggregatorInfos); // metric - metric stat pair - totalSizeInBytes += Integer.BYTES; // segment aggregated document count - totalSizeInBytes += Long.BYTES; // data start file pointer - totalSizeInBytes += Long.BYTES; // data length + // header size + totalSizeInBytes += computeHeaderByteSize(compositeFieldType, starTreeField.getName()); + // number of dimensions + totalSizeInBytes += Integer.BYTES; + // dimension field numbers + totalSizeInBytes += (long) starTreeField.getDimensionsOrder().size() * Integer.BYTES; + // metric count + totalSizeInBytes += Integer.BYTES; + // metric - metric stat pair + totalSizeInBytes += computeMetricEntriesSizeInBytes(metricAggregatorInfos); + // segment aggregated document count + totalSizeInBytes += Integer.BYTES; + // max leaf docs + totalSizeInBytes += Integer.BYTES; + // skip star node creation dimensions count + totalSizeInBytes += Integer.BYTES; + // skip star node creation dimensions field numbers + totalSizeInBytes += (long) starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims().size() * Integer.BYTES; + // data start file pointer + totalSizeInBytes += Long.BYTES; + // data length + totalSizeInBytes += Long.BYTES; logger.info("Star tree size in bytes : {}", totalSizeInBytes); @@ -142,13 +156,13 @@ private static void writeMetaHeader( /** * Writes the star-tree metadata. * - * @param metaOut the IndexOutput to write the metadata - * @param writeState the segment write state - * @param metricAggregatorInfos the list of metric aggregator information - * @param starTreeField the star tree field + * @param metaOut the IndexOutput to write the metadata + * @param writeState the segment write state + * @param metricAggregatorInfos the list of metric aggregator information + * @param starTreeField the star tree field * @param segmentAggregatedDocCount the aggregated document count for the segment - * @param dataFilePointer the file pointer to the start of the star-tree data - * @param dataFileLength the length of the star-tree data file + * @param dataFilePointer the file pointer to the start of the star-tree data + * @param dataFileLength the length of the star-tree data file * @throws IOException if an I/O error occurs while writing the metadata */ private static void writeMeta( @@ -184,6 +198,21 @@ private static void writeMeta( // segment aggregated document count metaOut.writeInt(segmentAggregatedDocCount); + // max leaf docs + metaOut.writeInt(starTreeField.getStarTreeConfig().maxLeafDocs()); + + // number of skip star node creation dimensions + metaOut.writeInt(starTreeField.getStarTreeConfig().maxLeafDocs()); + + // skip star node creations + for (String dimension : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + int dimensionFieldNumber = writeState.fieldInfos.fieldInfo(dimension).getFieldNumber(); + metaOut.writeInt(dimensionFieldNumber); + } + + // star tree build-mode + metaOut.writeString(starTreeField.getStarTreeConfig().getBuildMode().getTypeName()); + // star-tree data file pointer metaOut.writeLong(dataFilePointer);