From 0b41dac0cd53e7bb6fb0f9f4464639345ac8567c Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Fri, 30 Aug 2024 15:13:36 +0530 Subject: [PATCH] Star Tree Meta and Data Writers (#15295) (#15490) --------- Signed-off-by: Sarthak Aggarwal (cherry picked from commit 8629279533ff1a2f2c7bfcd772382d8b50d5cfba) Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../LuceneDocValuesConsumerFactory.java | 20 +- .../LuceneDocValuesProducerFactory.java | 19 +- .../CompositeIndexConstants.java | 26 ++ .../CompositeIndexMetadata.java | 53 +++ .../compositeindex/datacube/MetricStat.java | 37 +- .../datacube/ReadDimension.java | 55 +++ .../startree/StarTreeFieldConfiguration.java | 22 +- .../startree/builder/BaseStarTreeBuilder.java | 45 +-- .../startree/builder/StarTreeBuilder.java | 2 +- .../startree/fileformats/StarTreeWriter.java | 81 ++++ .../fileformats/data/StarTreeDataWriter.java | 110 ++++++ .../fileformats/data/package-info.java | 14 + .../fileformats/meta/StarTreeMetaWriter.java | 161 ++++++++ .../fileformats/meta/StarTreeMetadata.java | 363 ++++++++++++++++++ .../fileformats/meta/package-info.java | 14 + .../node/FixedLengthStarTreeNode.java | 302 +++++++++++++++ .../fileformats/node/package-info.java | 12 + .../startree/fileformats/package-info.java | 14 + .../InMemoryTreeNode.java} | 19 +- .../startree/node/StarTreeFactory.java | 42 ++ .../datacube/startree/node/StarTreeNode.java | 29 +- .../startree/node/StarTreeNodeType.java | 103 +++++ .../startree/utils/StarTreeUtils.java | 111 ++++++ .../builder/AbstractStarTreeBuilderTests.java | 45 ++- .../data/StarTreeFileFormatsTests.java | 209 ++++++++++ .../meta/StarTreeMetadataTests.java | 224 +++++++++++ .../node/FixedLengthStarTreeNodeTests.java | 233 +++++++++++ .../startree/node/StarTreeNodeTypeTests.java | 58 +++ .../startree/utils/StarTreeUtilsTests.java | 78 ++++ .../index/mapper/StarTreeMapperTests.java | 9 + 30 files changed, 2418 insertions(+), 92 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java rename server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/{utils/TreeNode.java => node/InMemoryTreeNode.java} (76%) create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java diff --git a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java index 1ed672870337e..4b3f62b6171da 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java @@ -34,17 +34,15 @@ public static DocValuesConsumer getDocValuesConsumerForCompositeCodec( String metaCodec, String metaExtension ) throws IOException { - try ( - Lucene90DocValuesConsumerWrapper lucene90DocValuesConsumerWrapper = new Lucene90DocValuesConsumerWrapper( - state, - dataCodec, - dataExtension, - metaCodec, - metaExtension - ) - ) { - return lucene90DocValuesConsumerWrapper.getLucene90DocValuesConsumer(); - } + Lucene90DocValuesConsumerWrapper lucene90DocValuesConsumerWrapper = new Lucene90DocValuesConsumerWrapper( + state, + dataCodec, + dataExtension, + metaCodec, + metaExtension + ); + return lucene90DocValuesConsumerWrapper.getLucene90DocValuesConsumer(); + } } diff --git a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java index 611a97ffeb834..d85205d239648 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java @@ -40,17 +40,14 @@ public static DocValuesProducer getDocValuesProducerForCompositeCodec( switch (compositeCodec) { case Composite99Codec.COMPOSITE_INDEX_CODEC_NAME: - try ( - Lucene90DocValuesProducerWrapper lucene90DocValuesProducerWrapper = new Lucene90DocValuesProducerWrapper( - state, - dataCodec, - dataExtension, - metaCodec, - metaExtension - ) - ) { - return lucene90DocValuesProducerWrapper.getLucene90DocValuesProducer(); - } + Lucene90DocValuesProducerWrapper lucene90DocValuesProducerWrapper = new Lucene90DocValuesProducerWrapper( + state, + dataCodec, + dataExtension, + metaCodec, + metaExtension + ); + return lucene90DocValuesProducerWrapper.getLucene90DocValuesProducer(); default: throw new IllegalStateException("Invalid composite codec " + "[" + compositeCodec + "]"); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java new file mode 100644 index 0000000000000..9402675ff39d9 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +/** + * This class contains constants used in the Composite Index implementation. + */ +public class CompositeIndexConstants { + + /** + * The magic marker value used for sanity checks in the Composite Index implementation. + */ + public static final long COMPOSITE_FIELD_MARKER = 0xC0950513F1E1DL; // Composite Field + + /** + * Represents the key to fetch number of non-star aggregated segment documents. + */ + public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount"; + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java new file mode 100644 index 0000000000000..4972c877d4ab8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +import org.opensearch.index.mapper.CompositeMappedFieldType; + +/** + * This class represents the metadata of a Composite Index, which includes information about + * the composite field name, type, and the specific metadata for the type of composite field + * (e.g., Star Tree metadata). + * + * @opensearch.experimental + */ +public class CompositeIndexMetadata { + + private final String compositeFieldName; + private final CompositeMappedFieldType.CompositeFieldType compositeFieldType; + + /** + * Constructs a CompositeIndexMetadata object with the provided composite field name and type. + * + * @param compositeFieldName the name of the composite field + * @param compositeFieldType the type of the composite field + */ + public CompositeIndexMetadata(String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) { + this.compositeFieldName = compositeFieldName; + this.compositeFieldType = compositeFieldType; + } + + /** + * Returns the name of the composite field. + * + * @return the composite field name + */ + public String getCompositeFieldName() { + return compositeFieldName; + } + + /** + * Returns the type of the composite field. + * + * @return the composite field type + */ + public CompositeMappedFieldType.CompositeFieldType getCompositeFieldType() { + return compositeFieldType; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java index 1522078024b64..a7b4c96c372d8 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java @@ -20,37 +20,43 @@ */ @ExperimentalApi public enum MetricStat { - VALUE_COUNT("value_count"), - SUM("sum"), - MIN("min"), - MAX("max"), - AVG("avg", VALUE_COUNT, SUM), - DOC_COUNT("doc_count", true); + VALUE_COUNT("value_count", 0), + SUM("sum", 1), + MIN("min", 2), + MAX("max", 3), + AVG("avg", 4, VALUE_COUNT, SUM), + DOC_COUNT("doc_count", true, 5); private final String typeName; private final MetricStat[] baseMetrics; + private final int metricOrdinal; // System field stats cannot be used as input for user metric types private final boolean isSystemFieldStat; - MetricStat(String typeName) { - this(typeName, false); + MetricStat(String typeName, int metricOrdinal) { + this(typeName, false, metricOrdinal); } - MetricStat(String typeName, MetricStat... baseMetrics) { - this(typeName, false, baseMetrics); + MetricStat(String typeName, int metricOrdinal, MetricStat... baseMetrics) { + this(typeName, false, metricOrdinal, baseMetrics); } - MetricStat(String typeName, boolean isSystemFieldStat, MetricStat... baseMetrics) { + MetricStat(String typeName, boolean isSystemFieldStat, int metricOrdinal, MetricStat... baseMetrics) { this.typeName = typeName; this.isSystemFieldStat = isSystemFieldStat; this.baseMetrics = baseMetrics; + this.metricOrdinal = metricOrdinal; } public String getTypeName() { return typeName; } + public int getMetricOrdinal() { + return metricOrdinal; + } + /** * Return the list of metrics that this metric is derived from * For example, AVG is derived from COUNT and SUM @@ -76,4 +82,13 @@ public static MetricStat fromTypeName(String typeName) { } throw new IllegalArgumentException("Invalid metric stat: " + typeName); } + + public static MetricStat fromMetricOrdinal(int metricOrdinal) { + for (MetricStat metric : MetricStat.values()) { + if (metric.getMetricOrdinal() == metricOrdinal) { + return metric; + } + } + throw new IllegalArgumentException("Invalid metric stat: " + metricOrdinal); + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java new file mode 100644 index 0000000000000..4264ec87d2c74 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube; + +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.mapper.CompositeDataCubeFieldType; + +import java.io.IOException; +import java.util.Objects; + +/** + * Represents a dimension for reconstructing StarTreeField from file formats during searches and merges. + * + * @opensearch.experimental + */ +public class ReadDimension implements Dimension { + public static final String READ = "read"; + private final String field; + + public ReadDimension(String field) { + this.field = field; + } + + public String getField() { + return field; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CompositeDataCubeFieldType.NAME, field); + builder.field(CompositeDataCubeFieldType.TYPE, READ); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ReadDimension dimension = (ReadDimension) o; + return Objects.equals(field, dimension.getField()); + } + + @Override + public int hashCode() { + return Objects.hash(field); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java index 755c064c2c60a..d732a8598d711 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java @@ -56,19 +56,25 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @ExperimentalApi public enum StarTreeBuildMode { // TODO : remove onheap support unless this proves useful - ON_HEAP("onheap"), - OFF_HEAP("offheap"); + ON_HEAP("onheap", (byte) 0), + OFF_HEAP("offheap", (byte) 1); private final String typeName; + private final byte buildModeOrdinal; - StarTreeBuildMode(String typeName) { + StarTreeBuildMode(String typeName, byte buildModeOrdinal) { this.typeName = typeName; + this.buildModeOrdinal = buildModeOrdinal; } public String getTypeName() { return typeName; } + public byte getBuildModeOrdinal() { + return buildModeOrdinal; + } + public static StarTreeBuildMode fromTypeName(String typeName) { for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { if (starTreeBuildMode.getTypeName().equalsIgnoreCase(typeName)) { @@ -77,6 +83,16 @@ public static StarTreeBuildMode fromTypeName(String typeName) { } throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", typeName)); } + + public static StarTreeBuildMode fromBuildModeOrdinal(byte buildModeOrdinal) { + for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { + if (starTreeBuildMode.getBuildModeOrdinal() == buildModeOrdinal) { + return starTreeBuildMode; + } + } + throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", buildModeOrdinal)); + } + } public int maxLeafDocs() { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index ddcf02cc6291a..d3105b4ae23c7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -26,8 +26,9 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.Mapper; @@ -46,7 +47,7 @@ import java.util.Objects; import java.util.Set; -import static org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode.ALL; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; /** * Builder for star tree. Defines the algorithm to construct star-tree @@ -72,7 +73,7 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { protected int numStarTreeNodes; protected final int maxLeafDocuments; - protected final TreeNode rootNode = getNewNode(); + protected final InMemoryTreeNode rootNode = getNewNode(); protected final StarTreeField starTreeField; private final SegmentWriteState state; @@ -556,7 +557,7 @@ void build(Iterator starTreeDocumentIterator) throws IOExcepti int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); - // TODO: When StarTree Codec is ready + // TODO: When StarTreeFactory Codec is ready // Create doc values indices in disk // Serialize and save in disk // Write star tree metadata for off heap implementation @@ -578,9 +579,9 @@ private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOExcept * * @return return new star-tree node */ - private TreeNode getNewNode() { + private InMemoryTreeNode getNewNode() { numStarTreeNodes++; - return new TreeNode(); + return new InMemoryTreeNode(); } /** @@ -591,7 +592,7 @@ private TreeNode getNewNode() { * @param endDocId end document id * @throws IOException throws an exception if we are unable to construct the tree */ - private void constructStarTree(TreeNode node, int startDocId, int endDocId) throws IOException { + private void constructStarTree(InMemoryTreeNode node, int startDocId, int endDocId) throws IOException { int childDimensionId = node.dimensionId + 1; if (childDimensionId == numDimensions) { @@ -600,7 +601,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro // Construct all non-star children nodes node.childDimensionId = childDimensionId; - Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); + Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); node.children = children; // Construct star-node if required @@ -609,7 +610,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro } // Further split on child nodes if required - for (TreeNode child : children.values()) { + for (InMemoryTreeNode child : children.values()) { if (child.endDocId - child.startDocId > maxLeafDocuments) { constructStarTree(child, child.startDocId, child.endDocId); } @@ -625,14 +626,14 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro * @return root node with non-star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { - Map nodes = new HashMap<>(); + private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { + Map nodes = new HashMap<>(); int nodeStartDocId = startDocId; Long nodeDimensionValue = getDimensionValue(startDocId, dimensionId); for (int i = startDocId + 1; i < endDocId; i++) { Long dimensionValue = getDimensionValue(i, dimensionId); if (Objects.equals(dimensionValue, nodeDimensionValue) == false) { - TreeNode child = getNewNode(); + InMemoryTreeNode child = getNewNode(); child.dimensionId = dimensionId; child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; child.startDocId = nodeStartDocId; @@ -643,7 +644,7 @@ private Map constructNonStarNodes(int startDocId, int endDocId, nodeDimensionValue = dimensionValue; } } - TreeNode lastNode = getNewNode(); + InMemoryTreeNode lastNode = getNewNode(); lastNode.dimensionId = dimensionId; lastNode.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; lastNode.startDocId = nodeStartDocId; @@ -661,11 +662,11 @@ private Map constructNonStarNodes(int startDocId, int endDocId, * @return root node with star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { - TreeNode starNode = getNewNode(); + private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { + InMemoryTreeNode starNode = getNewNode(); starNode.dimensionId = dimensionId; starNode.dimensionValue = ALL; - starNode.isStarNode = true; + starNode.nodeType = StarTreeNodeType.STAR.getValue(); starNode.startDocId = numStarTreeDocs; Iterator starTreeDocumentIterator = generateStarTreeDocumentsForStarNode(startDocId, endDocId, dimensionId); while (starTreeDocumentIterator.hasNext()) { @@ -682,7 +683,7 @@ private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId * @return aggregated star-tree documents * @throws IOException throws an exception upon failing to create new aggregated docs based on star tree */ - private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException { + private StarTreeDocument createAggregatedDocs(InMemoryTreeNode node) throws IOException { StarTreeDocument aggregatedStarTreeDocument = null; if (node.children == null) { @@ -709,8 +710,8 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException // For non-leaf node if (node.children.containsKey((long) ALL)) { // If it has star child, use the star child aggregated document directly - for (TreeNode child : node.children.values()) { - if (child.isStarNode) { + for (InMemoryTreeNode child : node.children.values()) { + if (child.nodeType == StarTreeNodeType.STAR.getValue()) { aggregatedStarTreeDocument = createAggregatedDocs(child); node.aggregatedDocId = child.aggregatedDocId; } else { @@ -720,12 +721,12 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException } else { // If no star child exists, aggregate all aggregated documents from non-star children if (node.children.values().size() == 1) { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); node.aggregatedDocId = child.aggregatedDocId; } } else { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); } if (null == aggregatedStarTreeDocument) { @@ -760,7 +761,7 @@ public void close() throws IOException { abstract Iterator mergeStarTrees(List starTreeValues) throws IOException; - public TreeNode getRootNode() { + public InMemoryTreeNode getRootNode() { return rootNode; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java index 94c9c9f2efb18..357f48c0cc726 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java @@ -34,7 +34,7 @@ public interface StarTreeBuilder extends Closeable { void build(Map fieldProducerMap) throws IOException; /** - * Builds the star tree using StarTree values from multiple segments + * Builds the star tree using StarTreeFactory values from multiple segments * * @param starTreeValuesSubs contains the star tree values from multiple segments * @throws IOException when we are unable to build star-tree diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java new file mode 100644 index 0000000000000..7f1839024eea7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats; + +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.data.StarTreeDataWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetaWriter; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; + +import java.io.IOException; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeWriter { + + /** Initial version for the star tree writer */ + public static final int VERSION_START = 0; + + /** Current version for the star tree writer */ + public static final int VERSION_CURRENT = VERSION_START; + + public StarTreeWriter() {} + + /** + * Write star tree to index output stream + * + * @param dataOut data index output + * @param rootNode root star-tree node + * @param numNodes number of nodes in the star tree + * @param name name of the star-tree field + * @return total size of the three + * @throws IOException when star-tree data serialization fails + */ + public long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + return StarTreeDataWriter.writeStarTree(dataOut, rootNode, numNodes, name); + } + + /** + * Write star tree metadata to index output stream + * + * @param metaOut meta index output + * @param starTreeField star tree field + * @param metricAggregatorInfos metric aggregator infos + * @param numNodes number of nodes in the star tree + * @param segmentAggregatedCount segment aggregated count + * @param dataFilePointer data file pointer + * @param dataFileLength data file length + * @throws IOException when star-tree data serialization fails + */ + public void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + List metricAggregatorInfos, + Integer numNodes, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + StarTreeMetaWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + metricAggregatorInfos, + numNodes, + segmentAggregatedCount, + dataFilePointer, + dataFileLength + ); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java new file mode 100644 index 0000000000000..32feb78a4db3d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.data; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * Utility class for serializing a star-tree data structure. + * + * @opensearch.experimental + */ +public class StarTreeDataWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeDataWriter.class); + + /** + * Writes the star-tree data structure. + * + * @param indexOutput the IndexOutput to write the star-tree data + * @param rootNode the root node of the star-tree + * @param numNodes the total number of nodes in the star-tree + * @param name the name of the star-tree field + * @return the total size in bytes of the serialized star-tree data + * @throws IOException if an I/O error occurs while writing the star-tree data + */ + public static long writeStarTree(IndexOutput indexOutput, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + long totalSizeInBytes = (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; + + logger.debug("Star tree data size in bytes : {} for star-tree field {}", totalSizeInBytes, name); + + writeStarTreeNodes(indexOutput, rootNode); + return totalSizeInBytes; + } + + /** + * Writes the star-tree nodes in a breadth-first order. + * + * @param output the IndexOutput to write the nodes + * @param rootNode the root node of the star-tree + * @throws IOException if an I/O error occurs while writing the nodes + */ + private static void writeStarTreeNodes(IndexOutput output, InMemoryTreeNode rootNode) throws IOException { + Queue queue = new LinkedList<>(); + queue.add(rootNode); + + int currentNodeId = 0; + while (!queue.isEmpty()) { + InMemoryTreeNode node = queue.remove(); + + if (node.children == null || node.children.isEmpty()) { + writeStarTreeNode(output, node, ALL, ALL); + } else { + + // Sort all children nodes based on dimension value + List sortedChildren = new ArrayList<>(node.children.values()); + sortedChildren.sort( + Comparator.comparingInt(InMemoryTreeNode::getNodeType).thenComparingLong(InMemoryTreeNode::getDimensionValue) + ); + + int firstChildId = currentNodeId + queue.size() + 1; + int lastChildId = firstChildId + sortedChildren.size() - 1; + writeStarTreeNode(output, node, firstChildId, lastChildId); + + queue.addAll(sortedChildren); + } + + currentNodeId++; + } + } + + /** + * Writes a single star-tree node + * + * @param output the IndexOutput to write the node + * @param node the star tree node to write + * @param firstChildId the ID of the first child node + * @param lastChildId the ID of the last child node + * @throws IOException if an I/O error occurs while writing the node + */ + private static void writeStarTreeNode(IndexOutput output, InMemoryTreeNode node, int firstChildId, int lastChildId) throws IOException { + output.writeInt(node.dimensionId); + output.writeLong(node.dimensionValue); + output.writeInt(node.startDocId); + output.writeInt(node.endDocId); + output.writeInt(node.aggregatedDocId); + output.writeByte(node.nodeType); + output.writeInt(firstChildId); + output.writeInt(lastChildId); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java new file mode 100644 index 0000000000000..1c6df3886e08d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Writer package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.data; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java new file mode 100644 index 0000000000000..2515c1efc3aed --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java @@ -0,0 +1,161 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.List; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; + +/** + * The utility class for serializing the metadata of a star-tree data structure. + * The metadata includes information about the dimensions, metrics, and other relevant details + * related to the star tree. + * + * @opensearch.experimental + */ +public class StarTreeMetaWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeMetaWriter.class); + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param starTreeField the star-tree field + * @param metricAggregatorInfos the list of metric aggregator information + * @param segmentAggregatedCount the aggregated document count for the segment + * @param numNodes number of nodes in the star tree + * @param dataFilePointer the file pointer to the start of the star tree data + * @param dataFileLength the length of the star tree data file + * @throws IOException if an I/O error occurs while serializing the metadata + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + List metricAggregatorInfos, + Integer numNodes, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + long initialMetaFilePointer = metaOut.getFilePointer(); + + writeMetaHeader(metaOut); + + // TODO: Replace the parameters with StarTreeMetadata class object + writeMeta(metaOut, metricAggregatorInfos, starTreeField, numNodes, segmentAggregatedCount, dataFilePointer, dataFileLength); + + logger.debug( + "Star tree meta size in bytes : {} for star-tree field {}", + metaOut.getFilePointer() - initialMetaFilePointer, + starTreeField.getName() + ); + } + + /** + * Writes the star-tree metadata header. + * + * @param metaOut the IndexOutput to write the header + * @throws IOException if an I/O error occurs while writing the header + */ + private static void writeMetaHeader(IndexOutput metaOut) throws IOException { + // magic marker for sanity + metaOut.writeLong(COMPOSITE_FIELD_MARKER); + + // version + metaOut.writeVInt(VERSION_CURRENT); + } + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param metricAggregatorInfos the list of metric aggregator information + * @param starTreeField the star tree field + * @param numNodes number of nodes in the star tree + * @param segmentAggregatedDocCount the aggregated document count for the segment + * @param dataFilePointer the file pointer to the start of the star-tree data + * @param dataFileLength the length of the star-tree data file + * @throws IOException if an I/O error occurs while writing the metadata + */ + private static void writeMeta( + IndexOutput metaOut, + List metricAggregatorInfos, + StarTreeField starTreeField, + int numNodes, + Integer segmentAggregatedDocCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + // star tree field name + metaOut.writeString(starTreeField.getName()); + + // star tree field type + metaOut.writeString(CompositeMappedFieldType.CompositeFieldType.STAR_TREE.getName()); + + // number of nodes + metaOut.writeInt(numNodes); + + // number of dimensions + // TODO: Revisit the number of dimensions for timestamps (as we will split timestamp into min, hour, etc.) + metaOut.writeVInt(starTreeField.getDimensionsOrder().size()); + + // dimensions + // TODO: Add sub-dimensions for timestamps (as we will split timestamp into min, hour, etc.) + for (Dimension dimension : starTreeField.getDimensionsOrder()) { + metaOut.writeString(dimension.getField()); + } + + // number of metrics + metaOut.writeVInt(metricAggregatorInfos.size()); + + // metric - metric stat pair + for (MetricAggregatorInfo metricAggregatorInfo : metricAggregatorInfos) { + metaOut.writeString(metricAggregatorInfo.getField()); + int metricStatOrdinal = metricAggregatorInfo.getMetricStat().getMetricOrdinal(); + metaOut.writeVInt(metricStatOrdinal); + } + + // segment aggregated document count + metaOut.writeVInt(segmentAggregatedDocCount); + + // max leaf docs + metaOut.writeVInt(starTreeField.getStarTreeConfig().maxLeafDocs()); + + // number of skip star node creation dimensions + metaOut.writeVInt(starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims().size()); + + // skip star node creations + for (String dimension : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + metaOut.writeString(dimension); + } + + // star tree build-mode + metaOut.writeByte(starTreeField.getStarTreeConfig().getBuildMode().getBuildModeOrdinal()); + + // star-tree data file pointer + metaOut.writeVLong(dataFilePointer); + + // star-tree data file length + metaOut.writeVLong(dataFileLength); + + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java new file mode 100644 index 0000000000000..7519c85562a8c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -0,0 +1,363 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.IndexInput; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Holds the associated metadata for the building of star-tree. + * + * @opensearch.experimental + */ +public class StarTreeMetadata extends CompositeIndexMetadata { + private static final Logger logger = LogManager.getLogger(StarTreeMetadata.class); + + /** + * The index input for reading metadata from the segment file. + */ + private final IndexInput meta; + + /** + * The version of the star tree stored in the segments. + */ + private final int version; + + /** + * The number of the nodes in the respective star tree + */ + private final int numberOfNodes; + + /** + * The name of the star-tree field, used to identify the star-tree. + */ + private final String starTreeFieldName; + + /** + * The type of the star-tree field, indicating the specific implementation or version. + * Here, STAR_TREE field. + */ + private final String starTreeFieldType; + + /** + * List of dimension fields used in the star-tree. + */ + private final List dimensionFields; + + /** + * List of metrics, containing field names and associated metric statistics. + */ + private final List metrics; + + /** + * The total number of documents aggregated in this star-tree segment. + */ + private final Integer segmentAggregatedDocCount; + + /** + * The maximum number of documents allowed in a leaf node. + */ + private final Integer maxLeafDocs; + + /** + * Set of dimensions for which star node creation should be skipped. + */ + private final Set skipStarNodeCreationInDims; + + /** + * The build mode used for constructing the star-tree. + */ + private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode; + + /** + * The file pointer to the start of the associated star-tree data in the (.cid) file + */ + private final long dataStartFilePointer; + + /** + * The length of the star-tree data in bytes, used for reading the correct amount of data from (.cid) file + */ + private final long dataLength; + + /** + * A star tree metadata constructor to initialize star tree metadata from the segment file (.cim) using index input. + * + * @param metaIn an index input to read star-tree meta + * @param compositeFieldName name of the composite field. Here, name of the star-tree field. + * @param compositeFieldType type of the composite field. Here, STAR_TREE field. + * @param version The version of the star tree stored in the segments. + * @throws IOException if unable to read star-tree metadata from the file + */ + public StarTreeMetadata( + IndexInput metaIn, + String compositeFieldName, + CompositeMappedFieldType.CompositeFieldType compositeFieldType, + Integer version + ) throws IOException { + super(compositeFieldName, compositeFieldType); + this.meta = metaIn; + try { + this.starTreeFieldName = this.getCompositeFieldName(); + this.starTreeFieldType = this.getCompositeFieldType().getName(); + this.version = version; + this.numberOfNodes = readNumberOfNodes(); + this.dimensionFields = readStarTreeDimensions(); + this.metrics = readMetricEntries(); + this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); + this.maxLeafDocs = readMaxLeafDocs(); + this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims(); + this.starTreeBuildMode = readBuildMode(); + this.dataStartFilePointer = readDataStartFilePointer(); + this.dataLength = readDataLength(); + } catch (Exception e) { + logger.error("Unable to read star-tree metadata from the file"); + throw new CorruptIndexException("Unable to read star-tree metadata from the file", metaIn); + } + } + + /** + * A star tree metadata constructor to initialize star tree metadata. + * Used for testing. + * + * @param meta an index input to read star-tree meta + * @param compositeFieldName name of the composite field. Here, name of the star-tree field. + * @param compositeFieldType type of the composite field. Here, STAR_TREE field. + * @param version The version of the star tree stored in the segments. + * @param dimensionFields list of dimension fields + * @param metrics list of metric entries + * @param segmentAggregatedDocCount segment aggregated doc count + * @param maxLeafDocs max leaf docs + * @param skipStarNodeCreationInDims set of dimensions to skip star node creation + * @param starTreeBuildMode star tree build mode + * @param dataStartFilePointer star file pointer to the associated star tree data in (.cid) file + * @param dataLength length of the corresponding star-tree data in (.cid) file + */ + public StarTreeMetadata( + String compositeFieldName, + CompositeMappedFieldType.CompositeFieldType compositeFieldType, + IndexInput meta, + Integer version, + Integer numberOfNodes, + List dimensionFields, + List metrics, + Integer segmentAggregatedDocCount, + Integer maxLeafDocs, + Set skipStarNodeCreationInDims, + StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode, + long dataStartFilePointer, + long dataLength + ) { + super(compositeFieldName, compositeFieldType); + this.meta = meta; + this.starTreeFieldName = compositeFieldName; + this.starTreeFieldType = compositeFieldType.getName(); + this.version = version; + this.numberOfNodes = numberOfNodes; + this.dimensionFields = dimensionFields; + this.metrics = metrics; + this.segmentAggregatedDocCount = segmentAggregatedDocCount; + this.maxLeafDocs = maxLeafDocs; + this.skipStarNodeCreationInDims = skipStarNodeCreationInDims; + this.starTreeBuildMode = starTreeBuildMode; + this.dataStartFilePointer = dataStartFilePointer; + this.dataLength = dataLength; + } + + private int readNumberOfNodes() throws IOException { + return meta.readInt(); + } + + private int readDimensionsCount() throws IOException { + return meta.readVInt(); + } + + private List readStarTreeDimensions() throws IOException { + int dimensionCount = readDimensionsCount(); + List dimensionFields = new ArrayList<>(); + + for (int i = 0; i < dimensionCount; i++) { + dimensionFields.add(meta.readString()); + } + + return dimensionFields; + } + + private int readMetricsCount() throws IOException { + return meta.readVInt(); + } + + private List readMetricEntries() throws IOException { + int metricCount = readMetricsCount(); + + Map starTreeMetricMap = new LinkedHashMap<>(); + for (int i = 0; i < metricCount; i++) { + String metricName = meta.readString(); + int metricStatOrdinal = meta.readVInt(); + MetricStat metricStat = MetricStat.fromMetricOrdinal(metricStatOrdinal); + Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); + metric.getMetrics().add(metricStat); + } + + return new ArrayList<>(starTreeMetricMap.values()); + } + + private int readSegmentAggregatedDocCount() throws IOException { + return meta.readVInt(); + } + + private int readMaxLeafDocs() throws IOException { + return meta.readVInt(); + } + + private int readSkipStarNodeCreationInDimsCount() throws IOException { + return meta.readVInt(); + } + + private Set readSkipStarNodeCreationInDims() throws IOException { + + int skipStarNodeCreationInDimsCount = readSkipStarNodeCreationInDimsCount(); + Set skipStarNodeCreationInDims = new HashSet<>(); + for (int i = 0; i < skipStarNodeCreationInDimsCount; i++) { + skipStarNodeCreationInDims.add(meta.readString()); + } + return skipStarNodeCreationInDims; + } + + private StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException { + return StarTreeFieldConfiguration.StarTreeBuildMode.fromBuildModeOrdinal(meta.readByte()); + } + + private long readDataStartFilePointer() throws IOException { + return meta.readVLong(); + } + + private long readDataLength() throws IOException { + return meta.readVLong(); + } + + /** + * Returns the name of the star-tree field. + * + * @return star-tree field name + */ + public String getStarTreeFieldName() { + return starTreeFieldName; + } + + /** + * Returns the type of the star tree field. + * + * @return star-tree field type + */ + public String getStarTreeFieldType() { + return starTreeFieldType; + } + + /** + * Returns the list of dimension field numbers. + * + * @return star-tree dimension field numbers + */ + public List getDimensionFields() { + return dimensionFields; + } + + /** + * Returns the list of metric entries. + * + * @return star-tree metric entries + */ + public List getMetrics() { + return metrics; + } + + /** + * Returns the aggregated document count for the star-tree. + * + * @return the aggregated document count for the star-tree. + */ + public Integer getSegmentAggregatedDocCount() { + return segmentAggregatedDocCount; + } + + /** + * Returns the max leaf docs for the star-tree. + * + * @return the max leaf docs. + */ + public Integer getMaxLeafDocs() { + return maxLeafDocs; + } + + /** + * Returns the set of dimensions for which star node will not be created in the star-tree. + * + * @return the set of dimensions. + */ + public Set getSkipStarNodeCreationInDims() { + return skipStarNodeCreationInDims; + } + + /** + * Returns the build mode for the star-tree. + * + * @return the star-tree build mode. + */ + public StarTreeFieldConfiguration.StarTreeBuildMode getStarTreeBuildMode() { + return starTreeBuildMode; + } + + /** + * Returns the file pointer to the start of the star-tree data. + * + * @return start file pointer for star-tree data + */ + public long getDataStartFilePointer() { + return dataStartFilePointer; + } + + /** + * Returns the length of star-tree data + * + * @return star-tree length + */ + public long getDataLength() { + return dataLength; + } + + /** + * Returns the version with which the star tree is stored in the segments + * @return star-tree version + */ + public int getVersion() { + return version; + } + + /** + * Returns the number of nodes in the star tree + * @return number of nodes in the star tree + */ + public int getNumberOfNodes() { + return numberOfNodes; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java new file mode 100644 index 0000000000000..a2480f03c4b5a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Meta package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java new file mode 100644 index 0000000000000..89ac4af51e221 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java @@ -0,0 +1,302 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.node; + +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Iterator; + +/** + * Fixed Length implementation of {@link StarTreeNode}. + *

+ * This class represents a node in a star tree with a fixed-length serialization format. + * It provides efficient storage and retrieval of node information using a RandomAccessInput. + * The node structure includes the methods to access all the constructs of InMemoryTreeNode. + * + *

+ * Key features: + * - Fixed-size serialization for each node, allowing for efficient random access + * - Binary search capability for finding child nodes + * - Support for star nodes, null nodes and other default nodes + * - Iteration over child nodes + *

+ * The class uses specific byte offsets for each field in the serialized format, + * enabling direct access to node properties without parsing the entire node structure. + * + * @opensearch.experimental + */ +public class FixedLengthStarTreeNode implements StarTreeNode { + + /** + * Number of integer fields in the serializable data + */ + public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; + + /** + * Number of long fields in the serializable data + */ + public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; + + /** + * Number of byte fields in the serializable data + */ + public static final int NUM_BYTE_SERIALIZABLE_FIELDS = 1; + + /** + * Total size in bytes of the serializable data for each node + */ + public static final long SERIALIZABLE_DATA_SIZE_IN_BYTES = (Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS) + (Long.BYTES + * NUM_LONG_SERIALIZABLE_FIELDS) + (NUM_BYTE_SERIALIZABLE_FIELDS * Byte.BYTES); + + // Byte offsets for each field in the serialized data + static final int DIMENSION_ID_OFFSET = 0; + static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; + static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; + static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; + static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; + static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Integer.BYTES; + static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES; + static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; + + /** + * Constant representing an invalid node ID + */ + public static final int INVALID_ID = -1; + + /** + * The ID of this node + */ + private final int nodeId; + + /** + * The ID of the first child of this node + */ + private final int firstChildId; + + /** + * The input source for reading node data + */ + RandomAccessInput in; + + /** + * Constructs a FixedLengthStarTreeNode. + * + * @param in The RandomAccessInput to read node data from + * @param nodeId The ID of this node + * @throws IOException If there's an error reading from the input + */ + public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOException { + this.in = in; + this.nodeId = nodeId; + firstChildId = getInt(FIRST_CHILD_ID_OFFSET); + } + + /** + * Reads an integer value from the specified offset in the node's data. + * + * @param fieldOffset The offset of the field to read + * @return The integer value at the specified offset + * @throws IOException If there's an error reading from the input + */ + private int getInt(int fieldOffset) throws IOException { + return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + /** + * Reads a long value from the specified offset in the node's data. + * + * @param fieldOffset The offset of the field to read + * @return The long value at the specified offset + * @throws IOException If there's an error reading from the input + */ + private long getLong(int fieldOffset) throws IOException { + return in.readLong(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + /** + * Reads a byte value from the specified offset in the node's data. + * + * @param fieldOffset The offset of the field to read + * @return The byte value at the specified offset + * @throws IOException If there's an error reading from the input + */ + private byte getByte(int fieldOffset) throws IOException { + return in.readByte(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + @Override + public int getDimensionId() throws IOException { + return getInt(DIMENSION_ID_OFFSET); + } + + @Override + public long getDimensionValue() throws IOException { + return getLong(DIMENSION_VALUE_OFFSET); + } + + @Override + public int getChildDimensionId() throws IOException { + if (firstChildId == INVALID_ID) { + return INVALID_ID; + } else { + return in.readInt(firstChildId * SERIALIZABLE_DATA_SIZE_IN_BYTES); + } + } + + @Override + public int getStartDocId() throws IOException { + return getInt(START_DOC_ID_OFFSET); + } + + @Override + public int getEndDocId() throws IOException { + return getInt(END_DOC_ID_OFFSET); + } + + @Override + public int getAggregatedDocId() throws IOException { + return getInt(AGGREGATE_DOC_ID_OFFSET); + } + + @Override + public int getNumChildren() throws IOException { + if (firstChildId == INVALID_ID) { + return 0; + } else { + return getInt(LAST_CHILD_ID_OFFSET) - firstChildId + 1; + } + } + + @Override + public boolean isLeaf() { + return firstChildId == INVALID_ID; + } + + @Override + public byte getStarTreeNodeType() throws IOException { + return getByte(STAR_NODE_TYPE_OFFSET); + } + + @Override + public StarTreeNode getChildStarNode() throws IOException { + return handleStarNode(); + } + + @Override + public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOException { + // there will be no children for leaf nodes + if (isLeaf()) { + return null; + } + + StarTreeNode resultStarTreeNode = null; + if (null != dimensionValue) { + resultStarTreeNode = binarySearchChild(dimensionValue); + assert null != resultStarTreeNode; + } + return resultStarTreeNode; + } + + /** + * Handles the special case of a star node. + * + * @return The star node if found, null otherwise + * @throws IOException If there's an error reading from the input + */ + private FixedLengthStarTreeNode handleStarNode() throws IOException { + FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); + return matchStarTreeNodeTypeOrNull(firstNode, StarTreeNodeType.STAR); + } + + /** + * Checks if the given node matches the specified StarTreeNodeType. + * + * @param firstNode The FixedLengthStarTreeNode to check. + * @param starTreeNodeType The StarTreeNodeType to match against. + * @return The firstNode if its type matches the targetType, null otherwise. + * @throws IOException If an I/O error occurs during the operation. + */ + private static FixedLengthStarTreeNode matchStarTreeNodeTypeOrNull(FixedLengthStarTreeNode firstNode, StarTreeNodeType starTreeNodeType) + throws IOException { + if (firstNode.getStarTreeNodeType() == starTreeNodeType.getValue()) { + return firstNode; + } else { + return null; + } + } + + /** + * Performs a binary search to find a child node with the given dimension value. + * + * @param dimensionValue The dimension value to search for + * @return The child node if found, null otherwise + * @throws IOException If there's an error reading from the input + */ + private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { + + int low = firstChildId; + + // if the current node is star node, increment the low to reduce the search space + if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, firstChildId), StarTreeNodeType.STAR) != null) { + low++; + } + + // if the current node is null node, increment the low to reduce the search space + if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, low), StarTreeNodeType.NULL) != null) { + low++; + } + + int high = getInt(LAST_CHILD_ID_OFFSET); + + while (low <= high) { + int mid = low + (high - low) / 2; + FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid); + long midDimensionValue = midNode.getDimensionValue(); + + if (midDimensionValue == dimensionValue) { + return midNode; + } else if (midDimensionValue < dimensionValue) { + low = mid + 1; + } else { + high = mid - 1; + } + } + return null; + } + + @Override + public Iterator getChildrenIterator() throws IOException { + return new Iterator<>() { + private int currentChildId = firstChildId; + private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); + + @Override + public boolean hasNext() { + return currentChildId <= lastChildId; + } + + @Override + public FixedLengthStarTreeNode next() { + try { + return new FixedLengthStarTreeNode(in, currentChildId++); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java new file mode 100644 index 0000000000000..84271be81f5e4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Holds classes associated with star tree node with file formats + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.node; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java new file mode 100644 index 0000000000000..917327757fc9b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * File formats for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java similarity index 76% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java index a5d59a2602633..20f7dcf184391 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java @@ -5,12 +5,14 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.utils; +package org.opensearch.index.compositeindex.datacube.startree.node; import org.opensearch.common.annotation.ExperimentalApi; import java.util.Map; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + /** * /** * Represents a node in a tree data structure, specifically designed for a star-tree implementation. @@ -19,9 +21,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class TreeNode { - - public static final int ALL = -1; +public class InMemoryTreeNode { /** * The dimension id for the dimension (field) associated with this star-tree node. @@ -54,16 +54,21 @@ public class TreeNode { public long dimensionValue = ALL; /** - * A flag indicating whether this node is a star node (a node that represents an aggregation of all dimensions). + * A byte indicating whether the node is star node, null node or default node (with dimension value present). */ - public boolean isStarNode = false; + public byte nodeType = 0; /** * A map containing the child nodes of this star-tree node, keyed by their dimension id. */ - public Map children; + public Map children; public long getDimensionValue() { return dimensionValue; } + + public byte getNodeType() { + return nodeType; + } + } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java new file mode 100644 index 0000000000000..79b5947d4f00a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode; + +import java.io.IOException; + +/** + * A factory class for creating off-heap implementations of star-tree nodes. + * + *

This class provides a static factory method to create instances of {@link StarTreeNode} + * from an {@link IndexInput} and {@link StarTreeMetadata}. The implementation uses an + * off-heap data structure to store and access the star-tree data efficiently using random access. + * + * @opensearch.experimental + */ +public class StarTreeFactory { + + /** + * Creates a new instance of {@link StarTreeNode} from the provided {@link IndexInput} and + * {@link StarTreeMetadata}. + * + * @param data The {@link IndexInput} containing the star-tree data. + * @param starTreeMetadata The {@link StarTreeMetadata} containing metadata about the star-tree. + * @return A new instance of {@link StarTreeNode} representing the root of the star-tree. + * @throws IOException If an error occurs while reading the star-tree data. + */ + public static StarTreeNode createStarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { + RandomAccessInput in = data.randomAccessSlice(0, starTreeMetadata.getDataLength()); + return new FixedLengthStarTreeNode(in, 0); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java index 59522ffa4be89..fce3e30e9ebf6 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java @@ -20,7 +20,6 @@ */ @ExperimentalApi public interface StarTreeNode { - long ALL = -1l; /** * Returns the dimension ID of the current star-tree node. @@ -86,21 +85,37 @@ public interface StarTreeNode { boolean isLeaf(); /** - * Checks if the current node is a star node. + * Determines the type of the current node in the Star Tree index structure. * - * @return true if the node is a star node, false otherwise - * @throws IOException if an I/O error occurs while reading the star node status + *

The node type can be one of the following: + *

    + *
  • Star Node: Represented by the value -2. + *
  • Null Node: Represented by the value -1. + *
  • Default Node: Represented by the value 0. + *
+ * @see StarTreeNodeType + * + * @return The type of the current node, represented by the corresponding integer value (-2, -1, or 0). + * @throws IOException if an I/O error occurs while reading the node type */ - boolean isStarNode() throws IOException; + byte getStarTreeNodeType() throws IOException; /** - * Returns the child star-tree node for the given dimension value. + * Returns the child node for the given dimension value in the star-tree. * * @param dimensionValue the dimension value * @return the child node for the given dimension value or null if child is not present * @throws IOException if an I/O error occurs while retrieving the child node */ - StarTreeNode getChildForDimensionValue(long dimensionValue) throws IOException; + StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOException; + + /** + * Returns the child star node for a node in the star-tree. + * + * @return the child node for the star node if star child node is not present + * @throws IOException if an I/O error occurs while retrieving the child node + */ + StarTreeNode getChildStarNode() throws IOException; /** * Returns an iterator over the children of the current star-tree node. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java new file mode 100644 index 0000000000000..4c4725e78ff15 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +/** + * Represents the different types of nodes in a StarTreeFactory data structure. + * + *

+ * In order to handle different node types, we use a byte value to represent the node type. + * This enum provides a convenient way to map byte values to their corresponding node types. + * + *

+ * Star and Null Nodes are represented as special cases. Default is the general case. + * Star and null nodes are represented with negative ordinal values to ensure that they are + * sorted before the default nodes, which are sorted based on their dimension values. + * + *

+ * The node type can be one of the following: + *

    + *
  • Star Node: Represented by the value -2. A star node is a special node that represents + * all possible values for a dimension.
  • + *
  • Null Node: Represented by the value -1. A null node indicates the absence of any value + * for a dimension.
  • + *
  • Default Node: Represented by the value 0. A default node represents a node with an + * actual dimension value.
  • + *
+ * + * By default, we want to consider nodes as default node. + * + * @opensearch.experimental + * @see StarTreeNode + */ +public enum StarTreeNodeType { + + /** + * Represents a star node type. + * + */ + STAR("star", (byte) -2), + + /** + * Represents a null node type. + */ + NULL("null", (byte) -1), + + /** + * Represents a default node type. + */ + DEFAULT("default", (byte) 0); + + private final String name; + private final byte value; + + /** + * Constructs a new StarTreeNodeType with the given name and value. + * + * @param name the name of the node type + * @param value the value associated with the node type + */ + StarTreeNodeType(String name, byte value) { + this.name = name; + this.value = value; + } + + /** + * Returns the name of the node type. + * + * @return the name of the node type + */ + public String getName() { + return name; + } + + /** + * Returns the value associated with the node type. + * + * @return the value associated with the node type + */ + public byte getValue() { + return value; + } + + /** + * Returns the StarTreeNodeType enum constant with the specified value. + * + * @param value the value of the enum constant to return + * @return the enum constant with the specified value, or null if no such constant exists + */ + public static StarTreeNodeType fromValue(byte value) { + for (StarTreeNodeType nodeType : StarTreeNodeType.values()) { + if (nodeType.getValue() == value) { + return nodeType; + } + } + throw new IllegalStateException("Unrecognized value byte to determine star-tree node type: [" + value + "]"); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java new file mode 100644 index 0000000000000..dc155df4eafca --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.util.Collections; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeUtils { + + private StarTreeUtils() {} + + public static final int ALL = -1; + + /** + * The suffix appended to dimension field names in the Star Tree index. + */ + public static final String DIMENSION_SUFFIX = "dim"; + + /** + * The suffix appended to metric field names in the Star Tree index. + */ + public static final String METRIC_SUFFIX = "metric"; + + /** + * Returns the full field name for a dimension in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param dimensionName name of the dimension + * @return full field name for the dimension in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeDimensionsDocValues(String starTreeFieldName, String dimensionName) { + return starTreeFieldName + "_" + dimensionName + "_" + DIMENSION_SUFFIX; + } + + /** + * Returns the full field name for a metric in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param fieldName name of the metric field + * @param metricName name of the metric + * @return full field name for the metric in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeMetricsDocValues(String starTreeFieldName, String fieldName, String metricName) { + return MetricAggregatorInfo.toFieldName(starTreeFieldName, fieldName, metricName) + "_" + METRIC_SUFFIX; + } + + /** + * Get field infos from field names + * + * @param fields field names + * @return field infos + */ + public static FieldInfo[] getFieldInfoList(List fields) { + FieldInfo[] fieldInfoList = new FieldInfo[fields.size()]; + + // field number is not really used. We depend on unique field names to get the desired iterator + int fieldNumber = 0; + + for (String fieldName : fields) { + fieldInfoList[fieldNumber] = getFieldInfo(fieldName, fieldNumber); + fieldNumber++; + } + return fieldInfoList; + } + + /** + * Get new field info instance for a given field name and field number + * @param fieldName name of the field + * @param fieldNumber number of the field + * @return new field info instance + */ + public static FieldInfo getFieldInfo(String fieldName, int fieldNumber) { + return new FieldInfo( + fieldName, + fieldNumber, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index e77f184ac0243..33088e8ccbcb3 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -36,8 +36,10 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.Mapper; @@ -1314,7 +1316,7 @@ public void test_build_starTreeDataset() throws IOException { Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments(); Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); Map> dimValueToDocIdMap = new HashMap<>(); - builder.rootNode.isStarNode = true; + builder.rootNode.nodeType = StarTreeNodeType.STAR.getValue(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); Map> expectedDimToValueMap = getExpectedDimToValueMap(); @@ -2784,13 +2786,13 @@ private static StarTreeField getStarTreeFieldWithDocCount(int maxLeafDocs, boole return sf; } - private void traverseStarTree(TreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { - TreeNode starTree = root; + private void traverseStarTree(InMemoryTreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { + InMemoryTreeNode starTree = root; // Use BFS to traverse the star tree - Queue queue = new ArrayDeque<>(); + Queue queue = new ArrayDeque<>(); queue.add(starTree); int currentDimensionId = -1; - TreeNode starTreeNode; + InMemoryTreeNode starTreeNode; List docIds = new ArrayList<>(); while ((starTreeNode = queue.poll()) != null) { int dimensionId = starTreeNode.dimensionId; @@ -2801,17 +2803,17 @@ private void traverseStarTree(TreeNode root, Map> di // store aggregated document of the node int docId = starTreeNode.aggregatedDocId; Map map = dimValueToDocIdMap.getOrDefault(dimensionId, new HashMap<>()); - if (starTreeNode.isStarNode) { + if (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()) { map.put(Long.MAX_VALUE, docId); } else { map.put(starTreeNode.dimensionValue, docId); } dimValueToDocIdMap.put(dimensionId, map); - if (starTreeNode.children != null && (!traverStarNodes || starTreeNode.isStarNode)) { - Iterator childrenIterator = starTreeNode.children.values().iterator(); + if (starTreeNode.children != null && (!traverStarNodes || starTreeNode.nodeType == StarTreeNodeType.STAR.getValue())) { + Iterator childrenIterator = starTreeNode.children.values().iterator(); while (childrenIterator.hasNext()) { - TreeNode childNode = childrenIterator.next(); + InMemoryTreeNode childNode = childrenIterator.next(); queue.add(childNode); } } @@ -2933,43 +2935,48 @@ public void testMergeFlow() throws IOException { validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } - private void validateStarTree(TreeNode root, int totalDimensions, int maxLeafDocuments, List starTreeDocuments) { + private void validateStarTree( + InMemoryTreeNode root, + int totalDimensions, + int maxLeafDocuments, + List starTreeDocuments + ) { Queue queue = new LinkedList<>(); queue.offer(new Object[] { root, false }); while (!queue.isEmpty()) { Object[] current = queue.poll(); - TreeNode node = (TreeNode) current[0]; + InMemoryTreeNode node = (InMemoryTreeNode) current[0]; boolean currentIsStarNode = (boolean) current[1]; assertNotNull(node); // assert dimensions - if (node.dimensionId != TreeNode.ALL) { + if (node.dimensionId != StarTreeUtils.ALL) { assertTrue(node.dimensionId >= 0 && node.dimensionId < totalDimensions); } if (node.children != null && !node.children.isEmpty()) { assertEquals(node.dimensionId + 1, node.childDimensionId); assertTrue(node.childDimensionId < totalDimensions); - TreeNode starNode = null; + InMemoryTreeNode starNode = null; Object[] nonStarNodeCumulativeMetrics = getMetrics(starTreeDocuments); - for (Map.Entry entry : node.children.entrySet()) { + for (Map.Entry entry : node.children.entrySet()) { Long childDimensionValue = entry.getKey(); - TreeNode child = entry.getValue(); + InMemoryTreeNode child = entry.getValue(); Object[] currMetrics = getMetrics(starTreeDocuments); - if (!child.isStarNode) { + if (child.nodeType != StarTreeNodeType.STAR.getValue()) { // Validate dimension values in documents for (int i = child.startDocId; i < child.endDocId; i++) { StarTreeDocument doc = starTreeDocuments.get(i); int j = 0; addMetrics(doc, currMetrics, j); - if (!child.isStarNode) { + if (child.nodeType != StarTreeNodeType.STAR.getValue()) { Long dimension = doc.dimensions[child.dimensionId]; assertEquals(childDimensionValue, dimension); if (dimension != null) { assertEquals(child.dimensionValue, (long) dimension); } else { // TODO : fix this ? - assertEquals(child.dimensionValue, TreeNode.ALL); + assertEquals(child.dimensionValue, StarTreeUtils.ALL); } } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java new file mode 100644 index 0000000000000..4653ac8b08198 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -0,0 +1,209 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.data; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Queue; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class StarTreeFileFormatsTests extends OpenSearchTestCase { + + private IndexOutput dataOut; + private IndexInput dataIn; + private Directory directory; + private Integer maxLevels; + private static Integer dimensionValue; + + @Before + public void setup() throws IOException { + directory = newFSDirectory(createTempDir()); + maxLevels = randomIntBetween(2, 5); + dimensionValue = 0; + } + + public void test_StarTreeNode() throws IOException { + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + Map inMemoryTreeNodeMap = new LinkedHashMap<>(); + InMemoryTreeNode root = generateSampleTree(inMemoryTreeNodeMap); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, inMemoryTreeNodeMap.size(), "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L)); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + + StarTreeNode starTreeNode = StarTreeFactory.createStarTree(dataIn, starTreeMetadata); + Queue queue = new ArrayDeque<>(); + queue.add(starTreeNode); + + while ((starTreeNode = queue.poll()) != null) { + + // verify the star node + assertStarTreeNode(starTreeNode, inMemoryTreeNodeMap.get(starTreeNode.getDimensionValue())); + + Iterator childrenIterator = starTreeNode.getChildrenIterator(); + + if (starTreeNode.getChildDimensionId() != -1) { + while (childrenIterator.hasNext()) { + StarTreeNode child = childrenIterator.next(); + if (child.getStarTreeNodeType() == StarTreeNodeType.DEFAULT.getValue()) { + assertStarTreeNode( + starTreeNode.getChildForDimensionValue(child.getDimensionValue()), + inMemoryTreeNodeMap.get(child.getDimensionValue()) + ); + assertNull(starTreeNode.getChildStarNode()); + } + + queue.add(child); + } + } else { + assertTrue(starTreeNode.isLeaf()); + } + } + + dataIn.close(); + + } + + public void test_starTreeSearch() throws IOException { + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + Map inMemoryTreeNodeMap = new LinkedHashMap<>(); + InMemoryTreeNode root = generateSampleTree(inMemoryTreeNodeMap); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, inMemoryTreeNodeMap.size(), "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L)); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + + StarTreeNode starTreeNode = StarTreeFactory.createStarTree(dataIn, starTreeMetadata); + InMemoryTreeNode inMemoryTreeNode = inMemoryTreeNodeMap.get(starTreeNode.getDimensionValue()); + assertNotNull(inMemoryTreeNode); + + for (int i = 0; i < maxLevels - 1; i++) { + InMemoryTreeNode randomChildNode = randomFrom(inMemoryTreeNode.children.values()); + StarTreeNode randomStarTreeChildNode = starTreeNode.getChildForDimensionValue(randomChildNode.dimensionValue); + + assertNotNull(randomStarTreeChildNode); + assertStarTreeNode(randomStarTreeChildNode, randomChildNode); + + starTreeNode = randomStarTreeChildNode; + inMemoryTreeNode = randomChildNode; + + } + dataIn.close(); + } + + private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode treeNode) throws IOException { + assertEquals(starTreeNode.getDimensionId(), treeNode.dimensionId); + assertEquals(starTreeNode.getDimensionValue(), treeNode.dimensionValue); + assertEquals(starTreeNode.getStartDocId(), treeNode.startDocId); + assertEquals(starTreeNode.getEndDocId(), treeNode.endDocId); + assertEquals(starTreeNode.getChildDimensionId(), treeNode.childDimensionId); + assertEquals(starTreeNode.getAggregatedDocId(), treeNode.aggregatedDocId); + assertEquals(starTreeNode.getStarTreeNodeType(), treeNode.nodeType); + + if (starTreeNode.getChildDimensionId() != -1) { + assertFalse(starTreeNode.isLeaf()); + if (treeNode.children != null) { + assertEquals(starTreeNode.getNumChildren(), treeNode.children.values().size()); + } + } else { + assertTrue(starTreeNode.isLeaf()); + } + + } + + public InMemoryTreeNode generateSampleTree(Map inMemoryTreeNodeMap) { + // Create the root node + InMemoryTreeNode root = new InMemoryTreeNode(); + root.dimensionId = 0; + root.startDocId = randomInt(); + root.endDocId = randomInt(); + root.childDimensionId = 1; + root.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + root.children = new HashMap<>(); + + inMemoryTreeNodeMap.put(root.dimensionValue, root); + + // Generate the tree recursively + generateTreeRecursively(root, 1, inMemoryTreeNodeMap); + + return root; + } + + private void generateTreeRecursively(InMemoryTreeNode parent, int currentLevel, Map inMemoryTreeNodeMap) { + if (currentLevel >= this.maxLevels) { + return; // Maximum level reached, stop generating children + } + + int numChildren = randomIntBetween(1, 10); + + for (int i = 0; i < numChildren; i++) { + InMemoryTreeNode child = new InMemoryTreeNode(); + dimensionValue++; + child.dimensionId = currentLevel; + child.dimensionValue = dimensionValue; // Assign a unique dimension value for each child + child.startDocId = randomInt(); + child.endDocId = randomInt(); + child.childDimensionId = (currentLevel == this.maxLevels - 1) ? -1 : (currentLevel + 1); + child.aggregatedDocId = randomInt(); + child.nodeType = (byte) 0; + child.children = new HashMap<>(); + + parent.children.put(child.dimensionValue, child); + inMemoryTreeNodeMap.put(child.dimensionValue, child); + + generateTreeRecursively(child, currentLevel + 1, inMemoryTreeNodeMap); + } + } + + public void tearDown() throws Exception { + super.tearDown(); + dataIn.close(); + dataOut.close(); + directory.close(); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java new file mode 100644 index 0000000000000..62bd74cc0b3fc --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java @@ -0,0 +1,224 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; + +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; +import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.index.mapper.CompositeMappedFieldType; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Set; +import java.util.UUID; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; +import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; + +public class StarTreeMetadataTests extends OpenSearchTestCase { + + private IndexOutput metaOut; + private IndexInput metaIn; + private StarTreeField starTreeField; + private SegmentWriteState writeState; + private Directory directory; + private FieldInfo[] fieldsInfo; + private List dimensionsOrder; + private List fields = List.of(); + private List metrics; + private List metricAggregatorInfos = new ArrayList<>(); + private int segmentDocumentCount; + private long dataFilePointer; + private long dataFileLength; + + @Before + public void setup() throws IOException { + fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 6, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + } + + public void test_starTreeMetadata() throws IOException { + dimensionsOrder = List.of( + new NumericDimension("field1"), + new NumericDimension("field3"), + new NumericDimension("field5"), + new NumericDimension("field8") + ); + metrics = List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.VALUE_COUNT)) + ); + int maxLeafDocs = randomInt(Integer.MAX_VALUE); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + maxLeafDocs, + Set.of("field10"), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + starTreeField = new StarTreeField("star_tree", dimensionsOrder, metrics, starTreeFieldConfiguration); + + for (Metric metric : metrics) { + for (MetricStat metricType : metric.getMetrics()) { + MetricAggregatorInfo metricAggregatorInfo = new MetricAggregatorInfo( + metricType, + metric.getField(), + starTreeField.getName(), + IndexNumericFieldData.NumericType.DOUBLE + ); + metricAggregatorInfos.add(metricAggregatorInfo); + } + } + + dataFileLength = randomNonNegativeLong(); + dataFilePointer = randomNonNegativeLong(); + segmentDocumentCount = randomInt(Integer.MAX_VALUE); + metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + int numberOfNodes = randomInt(Integer.MAX_VALUE); + starTreeWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + metricAggregatorInfos, + numberOfNodes, + segmentDocumentCount, + dataFilePointer, + dataFileLength + ); + metaOut.close(); + + // reading and asserting the metadata + metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE); + assertEquals(COMPOSITE_FIELD_MARKER, metaIn.readLong()); + assertEquals(VERSION_CURRENT, metaIn.readVInt()); + + String compositeFieldName = metaIn.readString(); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType, VERSION_CURRENT); + assertEquals(starTreeField.getName(), starTreeMetadata.getStarTreeFieldName()); + assertEquals(starTreeField.getName(), starTreeMetadata.getCompositeFieldName()); + assertEquals(STAR_TREE, starTreeMetadata.getCompositeFieldType()); + assertEquals(STAR_TREE.getName(), starTreeMetadata.getStarTreeFieldType()); + assertEquals(starTreeMetadata.getVersion(), VERSION_CURRENT); + assertEquals(starTreeMetadata.getNumberOfNodes(), numberOfNodes); + assertNotNull(starTreeMetadata); + + for (int i = 0; i < dimensionsOrder.size(); i++) { + assertEquals(dimensionsOrder.get(i).getField(), starTreeMetadata.getDimensionFields().get(i)); + } + + assertEquals(starTreeField.getMetrics().size(), starTreeMetadata.getMetrics().size()); + + for (int i = 0; i < starTreeField.getMetrics().size(); i++) { + + Metric expectedMetric = starTreeField.getMetrics().get(i); + Metric resultMetric = starTreeMetadata.getMetrics().get(i); + + assertEquals(expectedMetric.getField(), resultMetric.getField()); + assertEquals(expectedMetric.getMetrics().size(), resultMetric.getMetrics().size()); + + for (int j = 0; j < resultMetric.getMetrics().size(); j++) { + assertEquals(expectedMetric.getMetrics().get(j), resultMetric.getMetrics().get(j)); + } + } + assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0); + assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0); + assertEquals( + starTreeFieldConfiguration.getSkipStarNodeCreationInDims().size(), + starTreeMetadata.getSkipStarNodeCreationInDims().size() + ); + for (String skipStarNodeCreationInDims : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDims)); + } + assertEquals(starTreeFieldConfiguration.getBuildMode(), starTreeMetadata.getStarTreeBuildMode()); + assertEquals(dataFileLength, starTreeMetadata.getDataLength()); + assertEquals(dataFilePointer, starTreeMetadata.getDataStartFilePointer()); + + metaIn.close(); + + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + metaOut.close(); + metaIn.close(); + directory.close(); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java new file mode 100644 index 0000000000000..6f24728c24f30 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java @@ -0,0 +1,233 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.node; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class FixedLengthStarTreeNodeTests extends OpenSearchTestCase { + + private IndexOutput dataOut; + private IndexInput dataIn; + private Directory directory; + InMemoryTreeNode node; + InMemoryTreeNode starChild; + InMemoryTreeNode nullChild; + FixedLengthStarTreeNode starTreeNode; + + @Before + public void setup() throws IOException { + directory = newFSDirectory(createTempDir()); + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + + node = new InMemoryTreeNode(); + node.dimensionId = 0; + node.startDocId = randomInt(); + node.endDocId = randomInt(); + node.childDimensionId = 1; + node.aggregatedDocId = randomInt(); + node.nodeType = randomFrom((byte) 0, (byte) -1, (byte) 2); + node.children = new HashMap<>(); + + starChild = new InMemoryTreeNode(); + starChild.dimensionId = node.dimensionId + 1; + starChild.dimensionValue = -1; + starChild.startDocId = randomInt(); + starChild.endDocId = randomInt(); + starChild.childDimensionId = -1; + starChild.aggregatedDocId = randomInt(); + starChild.nodeType = (byte) -2; + starChild.children = new HashMap<>(); + node.children.put(-1L, starChild); + + nullChild = new InMemoryTreeNode(); + nullChild.dimensionId = node.dimensionId + 1; + nullChild.dimensionValue = -1; + nullChild.startDocId = randomInt(); + nullChild.endDocId = randomInt(); + nullChild.childDimensionId = -1; + nullChild.aggregatedDocId = randomInt(); + nullChild.nodeType = (byte) -1; + nullChild.children = new HashMap<>(); + node.children.put(null, nullChild); + + for (int i = 1; i < randomIntBetween(2, 5); i++) { + InMemoryTreeNode child = new InMemoryTreeNode(); + child.dimensionId = node.dimensionId + 1; + child.dimensionValue = node.dimensionValue + i; // Assign a unique dimension value for each child + child.startDocId = randomInt(); + child.endDocId = randomInt(); + child.childDimensionId = -1; + child.aggregatedDocId = randomInt(); + child.nodeType = (byte) 0; + child.children = new HashMap<>(); + node.children.put(child.dimensionValue, child); + } + + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 1 + node.children.size(), "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, 33L * node.children.size() + 33); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + + starTreeNode = (FixedLengthStarTreeNode) StarTreeFactory.createStarTree(dataIn, starTreeMetadata); + + } + + public void testOffsets() { + assertEquals(0, FixedLengthStarTreeNode.DIMENSION_ID_OFFSET); + assertEquals(4, FixedLengthStarTreeNode.DIMENSION_VALUE_OFFSET); + assertEquals(12, FixedLengthStarTreeNode.START_DOC_ID_OFFSET); + assertEquals(16, FixedLengthStarTreeNode.END_DOC_ID_OFFSET); + assertEquals(20, FixedLengthStarTreeNode.AGGREGATE_DOC_ID_OFFSET); + assertEquals(24, FixedLengthStarTreeNode.STAR_NODE_TYPE_OFFSET); + assertEquals(25, FixedLengthStarTreeNode.FIRST_CHILD_ID_OFFSET); + assertEquals(29, FixedLengthStarTreeNode.LAST_CHILD_ID_OFFSET); + } + + public void testSerializableDataSize() { + assertEquals(33, FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES); + } + + public void testGetDimensionId() throws IOException { + assertEquals(node.dimensionId, starTreeNode.getDimensionId()); + } + + public void testGetDimensionValue() throws IOException { + assertEquals(node.dimensionValue, starTreeNode.getDimensionValue()); + } + + public void testGetStartDocId() throws IOException { + assertEquals(node.startDocId, starTreeNode.getStartDocId()); + } + + public void testGetEndDocId() throws IOException { + assertEquals(node.endDocId, starTreeNode.getEndDocId()); + } + + public void testGetAggregatedDocId() throws IOException { + assertEquals(node.aggregatedDocId, starTreeNode.getAggregatedDocId()); + } + + public void testGetNumChildren() throws IOException { + assertEquals(node.children.size(), starTreeNode.getNumChildren()); + } + + public void testIsLeaf() { + assertFalse(starTreeNode.isLeaf()); + } + + public void testGetStarTreeNodeType() throws IOException { + assertEquals(node.getNodeType(), starTreeNode.getStarTreeNodeType()); + } + + public void testGetChildForDimensionValue() throws IOException { + // TODO: Add a test to verify children with star node, null node and default node with default dimension value -1 + long dimensionValue = randomIntBetween(0, node.children.size() - 3); + FixedLengthStarTreeNode childNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(dimensionValue); + assertNotNull(childNode); + assertEquals(dimensionValue, childNode.getDimensionValue()); + } + + public void testGetChildrenIterator() throws IOException { + Iterator iterator = starTreeNode.getChildrenIterator(); + int count = 0; + while (iterator.hasNext()) { + FixedLengthStarTreeNode child = iterator.next(); + assertNotNull(child); + count++; + } + assertEquals(starTreeNode.getNumChildren(), count); + } + + public void testGetChildForStarNode() throws IOException { + // Assuming the first child is a star node in our test data + FixedLengthStarTreeNode starNode = (FixedLengthStarTreeNode) starTreeNode.getChildStarNode(); + assertNotNull(starNode); + assertEquals(StarTreeUtils.ALL, starNode.getDimensionValue()); + } + + public void testGetChildForNullNode() throws IOException { + FixedLengthStarTreeNode nullNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(null); + assertNull(nullNode); + } + + public void testGetChildForInvalidDimensionValue() throws IOException { + long invalidDimensionValue = Long.MAX_VALUE; + assertThrows(AssertionError.class, () -> starTreeNode.getChildForDimensionValue(invalidDimensionValue)); + } + + public void testOnlyRootNodePresent() throws IOException { + + Directory directory = newFSDirectory(createTempDir()); + + IndexOutput dataOut = directory.createOutput("star-tree-data-1", IOContext.DEFAULT); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + + InMemoryTreeNode node = new InMemoryTreeNode(); + node.dimensionId = 0; + node.startDocId = randomInt(); + node.endDocId = randomInt(); + node.childDimensionId = 1; + node.aggregatedDocId = randomInt(); + node.nodeType = randomFrom((byte) 0, (byte) -1, (byte) 2); + node.children = new HashMap<>(); + + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 1, "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, 33); + dataOut.close(); + + IndexInput dataIn = directory.openInput("star-tree-data-1", IOContext.READONCE); + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + + FixedLengthStarTreeNode starTreeNode = (FixedLengthStarTreeNode) StarTreeFactory.createStarTree(dataIn, starTreeMetadata); + + assertEquals(starTreeNode.getNumChildren(), 0); + assertNull(starTreeNode.getChildForDimensionValue(randomLong())); + assertThrows(IllegalArgumentException.class, () -> starTreeNode.getChildrenIterator().next()); + assertThrows(UnsupportedOperationException.class, () -> starTreeNode.getChildrenIterator().remove()); + + dataIn.close(); + directory.close(); + } + + public void tearDown() throws Exception { + super.tearDown(); + dataIn.close(); + dataOut.close(); + directory.close(); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java new file mode 100644 index 0000000000000..81fb620da5af3 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.opensearch.test.OpenSearchTestCase; + +public class StarTreeNodeTypeTests extends OpenSearchTestCase { + + public void testStarNodeType() { + assertEquals("star", StarTreeNodeType.STAR.getName()); + assertEquals((byte) -2, StarTreeNodeType.STAR.getValue()); + } + + public void testNullNodeType() { + assertEquals("null", StarTreeNodeType.NULL.getName()); + assertEquals((byte) -1, StarTreeNodeType.NULL.getValue()); + } + + public void testDefaultNodeType() { + assertEquals("default", StarTreeNodeType.DEFAULT.getName()); + assertEquals((byte) 0, StarTreeNodeType.DEFAULT.getValue()); + } + + public void testFromValue() { + assertEquals(StarTreeNodeType.STAR, StarTreeNodeType.fromValue((byte) -2)); + assertEquals(StarTreeNodeType.NULL, StarTreeNodeType.fromValue((byte) -1)); + assertEquals(StarTreeNodeType.DEFAULT, StarTreeNodeType.fromValue((byte) 0)); + } + + public void testFromValueInvalid() { + IllegalStateException exception = expectThrows(IllegalStateException.class, () -> StarTreeNodeType.fromValue((byte) 1)); + assertEquals("Unrecognized value byte to determine star-tree node type: [1]", exception.getMessage()); + } + + public void testEnumValues() { + StarTreeNodeType[] values = StarTreeNodeType.values(); + assertEquals(3, values.length); + assertArrayEquals(new StarTreeNodeType[] { StarTreeNodeType.STAR, StarTreeNodeType.NULL, StarTreeNodeType.DEFAULT }, values); + } + + public void testEnumValueOf() { + assertEquals(StarTreeNodeType.STAR, StarTreeNodeType.valueOf("STAR")); + assertEquals(StarTreeNodeType.NULL, StarTreeNodeType.valueOf("NULL")); + assertEquals(StarTreeNodeType.DEFAULT, StarTreeNodeType.valueOf("DEFAULT")); + } + + public void testEnumValueOfInvalid() { + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> StarTreeNodeType.valueOf("INVALID")); + assertTrue(exception.getMessage().contains("No enum constant")); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java new file mode 100644 index 0000000000000..a1d341615969e --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +public class StarTreeUtilsTests extends OpenSearchTestCase { + + public void testFullyQualifiedFieldNameForStarTreeDimensionsDocValues() { + String starTreeFieldName = "myStarTreeField"; + String dimensionName = "dimension1"; + String expectedFieldName = "myStarTreeField_dimension1_dim"; + + String actualFieldName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues(starTreeFieldName, dimensionName); + assertEquals(expectedFieldName, actualFieldName); + } + + public void testFullyQualifiedFieldNameForStarTreeMetricsDocValues() { + String starTreeFieldName = "myStarTreeField"; + String fieldName = "myField"; + String metricName = "metric1"; + String expectedFieldName = "myStarTreeField_myField_metric1_metric"; + + String actualFieldName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues(starTreeFieldName, fieldName, metricName); + assertEquals(expectedFieldName, actualFieldName); + } + + public void testGetFieldInfoList() { + List fieldNames = Arrays.asList("field1", "field2", "field3"); + FieldInfo[] actualFieldInfos = StarTreeUtils.getFieldInfoList(fieldNames); + for (int i = 0; i < fieldNames.size(); i++) { + assertFieldInfos(actualFieldInfos[i], fieldNames.get(i), i); + } + } + + public void testGetFieldInfo() { + String fieldName = UUID.randomUUID().toString(); + int fieldNumber = randomInt(); + assertFieldInfos(StarTreeUtils.getFieldInfo(fieldName, fieldNumber), fieldName, fieldNumber); + + } + + private void assertFieldInfos(FieldInfo actualFieldInfo, String fieldName, Integer fieldNumber) { + assertEquals(fieldName, actualFieldInfo.name); + assertEquals(fieldNumber, actualFieldInfo.number, 0); + assertFalse(actualFieldInfo.hasVectorValues()); + assertTrue(actualFieldInfo.hasNorms()); + assertFalse(actualFieldInfo.hasVectors()); + assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, actualFieldInfo.getIndexOptions()); + assertEquals(DocValuesType.SORTED_NUMERIC, actualFieldInfo.getDocValuesType()); + assertEquals(-1, actualFieldInfo.getDocValuesGen()); + assertEquals(Collections.emptyMap(), actualFieldInfo.attributes()); + assertEquals(0, actualFieldInfo.getPointDimensionCount()); + assertEquals(0, actualFieldInfo.getPointIndexDimensionCount()); + assertEquals(0, actualFieldInfo.getPointNumBytes()); + assertEquals(0, actualFieldInfo.getVectorDimension()); + assertEquals(VectorEncoding.FLOAT32, actualFieldInfo.getVectorEncoding()); + assertEquals(VectorSimilarityFunction.EUCLIDEAN, actualFieldInfo.getVectorSimilarityFunction()); + assertFalse(actualFieldInfo.isSoftDeletesField()); + } + +} diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index 449b251dddca1..81454b210d6be 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -21,6 +21,7 @@ import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.ReadDimension; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.junit.After; @@ -334,6 +335,14 @@ public void testDimensions() { assertNotEquals(n1, n2); } + public void testReadDimensions() { + ReadDimension r1 = new ReadDimension("name"); + ReadDimension r2 = new ReadDimension("name"); + assertEquals(r1, r2); + r2 = new ReadDimension("name1"); + assertNotEquals(r1, r2); + } + public void testStarTreeField() { List m1 = new ArrayList<>(); m1.add(MetricStat.MAX);