From 600a53fdaa48bd8ede4f6c63a91499e1cbcd14b7 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Mon, 19 Aug 2024 18:40:22 +0530 Subject: [PATCH 01/11] star tree file formats meta and data writer Signed-off-by: Sarthak Aggarwal --- .../CompositeIndexConstants.java | 26 ++ .../CompositeIndexMetadata.java | 53 ++++ .../compositeindex/datacube/MetricStat.java | 37 ++- .../datacube/ReadDimension.java | 55 ++++ .../startree/StarTreeFieldConfiguration.java | 22 +- .../startree/fileformats/StarTreeWriter.java | 78 +++++ .../fileformats/data/StarTreeDataWriter.java | 142 +++++++++ .../fileformats/data/package-info.java | 14 + .../fileformats/meta/MetricEntry.java | 55 ++++ .../fileformats/meta/StarTreeMetaWriter.java | 156 ++++++++++ .../fileformats/meta/StarTreeMetadata.java | 271 ++++++++++++++++++ .../fileformats/meta/package-info.java | 14 + .../startree/fileformats/package-info.java | 14 + .../node/FixedLengthStarTreeNode.java | 186 ++++++++++++ .../startree/node/InMemoryTreeNode.java | 74 +++++ .../datacube/startree/node/StarTree.java | 65 +++++ .../datacube/startree/node/StarTreeNode.java | 19 +- .../startree/node/StarTreeNodeType.java | 103 +++++++ .../startree/utils/StarTreeUtils.java | 111 +++++++ .../data/StarTreeFileFormatsTests.java | 210 ++++++++++++++ .../fileformats/meta/StarTreeMetaTests.java | 209 ++++++++++++++ .../opensearch/test/OpenSearchTestCase.java | 8 + 22 files changed, 1902 insertions(+), 20 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java new file mode 100644 index 0000000000000..ffa3dee3578e5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +/** + * This class contains constants used in the Composite Index implementation. + */ +public class CompositeIndexConstants { + + /** + * The magic marker value used for sanity checks in the Composite Index implementation. + */ + public static final long COMPOSITE_FIELD_MARKER = 0xC0950513F1E1DL; // Composite Field + + /** + * Represents the key to fetch number of documents in a segment. + */ + public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount"; + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java new file mode 100644 index 0000000000000..6ba401afe0e6f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex; + +import org.opensearch.index.mapper.CompositeMappedFieldType; + +/** + * This class represents the metadata of a Composite Index, which includes information about + * the composite field name, type, and the specific metadata for the type of composite field + * (e.g., Tree metadata). + * + * @opensearch.experimental + */ +public class CompositeIndexMetadata { + + private final String compositeFieldName; + private final CompositeMappedFieldType.CompositeFieldType compositeFieldType; + + /** + * Constructs a CompositeIndexMetadata object with the provided composite field name and type. + * + * @param compositeFieldName the name of the composite field + * @param compositeFieldType the type of the composite field + */ + public CompositeIndexMetadata(String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) { + this.compositeFieldName = compositeFieldName; + this.compositeFieldType = compositeFieldType; + } + + /** + * Returns the name of the composite field. + * + * @return the composite field name + */ + public String getCompositeFieldName() { + return compositeFieldName; + } + + /** + * Returns the type of the composite field. + * + * @return the composite field type + */ + public CompositeMappedFieldType.CompositeFieldType getCompositeFieldType() { + return compositeFieldType; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java index 1522078024b64..a7b4c96c372d8 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/MetricStat.java @@ -20,37 +20,43 @@ */ @ExperimentalApi public enum MetricStat { - VALUE_COUNT("value_count"), - SUM("sum"), - MIN("min"), - MAX("max"), - AVG("avg", VALUE_COUNT, SUM), - DOC_COUNT("doc_count", true); + VALUE_COUNT("value_count", 0), + SUM("sum", 1), + MIN("min", 2), + MAX("max", 3), + AVG("avg", 4, VALUE_COUNT, SUM), + DOC_COUNT("doc_count", true, 5); private final String typeName; private final MetricStat[] baseMetrics; + private final int metricOrdinal; // System field stats cannot be used as input for user metric types private final boolean isSystemFieldStat; - MetricStat(String typeName) { - this(typeName, false); + MetricStat(String typeName, int metricOrdinal) { + this(typeName, false, metricOrdinal); } - MetricStat(String typeName, MetricStat... baseMetrics) { - this(typeName, false, baseMetrics); + MetricStat(String typeName, int metricOrdinal, MetricStat... baseMetrics) { + this(typeName, false, metricOrdinal, baseMetrics); } - MetricStat(String typeName, boolean isSystemFieldStat, MetricStat... baseMetrics) { + MetricStat(String typeName, boolean isSystemFieldStat, int metricOrdinal, MetricStat... baseMetrics) { this.typeName = typeName; this.isSystemFieldStat = isSystemFieldStat; this.baseMetrics = baseMetrics; + this.metricOrdinal = metricOrdinal; } public String getTypeName() { return typeName; } + public int getMetricOrdinal() { + return metricOrdinal; + } + /** * Return the list of metrics that this metric is derived from * For example, AVG is derived from COUNT and SUM @@ -76,4 +82,13 @@ public static MetricStat fromTypeName(String typeName) { } throw new IllegalArgumentException("Invalid metric stat: " + typeName); } + + public static MetricStat fromMetricOrdinal(int metricOrdinal) { + for (MetricStat metric : MetricStat.values()) { + if (metric.getMetricOrdinal() == metricOrdinal) { + return metric; + } + } + throw new IllegalArgumentException("Invalid metric stat: " + metricOrdinal); + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java new file mode 100644 index 0000000000000..3b6a02967f384 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube; + +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.mapper.CompositeDataCubeFieldType; + +import java.io.IOException; +import java.util.Objects; + +/** + * Composite index merge dimension class + * + * @opensearch.experimental + */ +public class ReadDimension implements Dimension { + public static final String READ = "read"; + private final String field; + + public ReadDimension(String field) { + this.field = field; + } + + public String getField() { + return field; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(CompositeDataCubeFieldType.NAME, field); + builder.field(CompositeDataCubeFieldType.TYPE, READ); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ReadDimension dimension = (ReadDimension) o; + return Objects.equals(field, dimension.getField()); + } + + @Override + public int hashCode() { + return Objects.hash(field); + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java index 755c064c2c60a..d732a8598d711 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/StarTreeFieldConfiguration.java @@ -56,19 +56,25 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @ExperimentalApi public enum StarTreeBuildMode { // TODO : remove onheap support unless this proves useful - ON_HEAP("onheap"), - OFF_HEAP("offheap"); + ON_HEAP("onheap", (byte) 0), + OFF_HEAP("offheap", (byte) 1); private final String typeName; + private final byte buildModeOrdinal; - StarTreeBuildMode(String typeName) { + StarTreeBuildMode(String typeName, byte buildModeOrdinal) { this.typeName = typeName; + this.buildModeOrdinal = buildModeOrdinal; } public String getTypeName() { return typeName; } + public byte getBuildModeOrdinal() { + return buildModeOrdinal; + } + public static StarTreeBuildMode fromTypeName(String typeName) { for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { if (starTreeBuildMode.getTypeName().equalsIgnoreCase(typeName)) { @@ -77,6 +83,16 @@ public static StarTreeBuildMode fromTypeName(String typeName) { } throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", typeName)); } + + public static StarTreeBuildMode fromBuildModeOrdinal(byte buildModeOrdinal) { + for (StarTreeBuildMode starTreeBuildMode : StarTreeBuildMode.values()) { + if (starTreeBuildMode.getBuildModeOrdinal() == buildModeOrdinal) { + return starTreeBuildMode; + } + } + throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid star tree build mode: [%s] ", buildModeOrdinal)); + } + } public int maxLeafDocs() { diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java new file mode 100644 index 0000000000000..00355fd187f49 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats; + +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.data.StarTreeDataWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetaWriter; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; + +import java.io.IOException; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeWriter { + + /** Initial version for the star tree writer */ + public static final int VERSION_START = 0; + + /** Current version for the star tree writer */ + public static final int VERSION_CURRENT = VERSION_START; + + private StarTreeWriter() {} + + /** + * Write star tree to index output stream + * + * @param dataOut data index output + * @param rootNode root star-tree node + * @param numNodes number of nodes in the tree + * @param name name of the star-tree field + * @return total size of the three + * @throws IOException when star-tree data serialization fails + */ + public static long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + return StarTreeDataWriter.writeStarTree(dataOut, rootNode, numNodes, name); + } + + /** + * Write star tree metadata to index output stream + * + * @param metaOut meta index output + * @param starTreeField star tree field + * @param metricAggregatorInfos metric aggregator infos + * @param segmentAggregatedCount segment aggregated count + * @param dataFilePointer data file pointer + * @param dataFileLength data file length + * @throws IOException when star-tree data serialization fails + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + List metricAggregatorInfos, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + StarTreeMetaWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + metricAggregatorInfos, + segmentAggregatedCount, + dataFilePointer, + dataFileLength + ); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java new file mode 100644 index 0000000000000..5ac47e5927d4f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.data; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; +import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * Utility class for serializing a star-tree data structure. + * + * @opensearch.experimental + */ +public class StarTreeDataWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeDataWriter.class); + + /** + * Writes the star-tree data structure. + * + * @param indexOutput the IndexOutput to write the star-tree data + * @param rootNode the root node of the star-tree + * @param numNodes the total number of nodes in the star-tree + * @param name the name of the star-tree field + * @return the total size in bytes of the serialized star-tree data + * @throws IOException if an I/O error occurs while writing the star-tree data + */ + public static long writeStarTree(IndexOutput indexOutput, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + long totalSizeInBytes = 0L; + totalSizeInBytes += computeStarTreeDataHeaderByteSize(); + totalSizeInBytes += (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; + + logger.debug("Star tree data size in bytes : {} for star-tree field {}", totalSizeInBytes, name); + + writeStarTreeHeader(indexOutput, numNodes); + writeStarTreeNodes(indexOutput, rootNode); + return totalSizeInBytes; + } + + /** + * Computes the byte size of the star-tree data header. + * + * @return the byte size of the star-tree data header + */ + public static int computeStarTreeDataHeaderByteSize() { + // Magic marker (8), version (4) + int headerSizeInBytes = 12; + + // For number of nodes. + headerSizeInBytes += Integer.BYTES; + return headerSizeInBytes; + } + + /** + * Writes the star-tree data header. + * + * @param output the IndexOutput to write the header + * @param numNodes the total number of nodes in the star-tree + * @throws IOException if an I/O error occurs while writing the header + */ + private static void writeStarTreeHeader(IndexOutput output, int numNodes) throws IOException { + output.writeLong(COMPOSITE_FIELD_MARKER); + output.writeInt(VERSION_CURRENT); + output.writeInt(numNodes); + } + + /** + * Writes the star-tree nodes in a breadth-first order. + * + * @param output the IndexOutput to write the nodes + * @param rootNode the root node of the star-tree + * @throws IOException if an I/O error occurs while writing the nodes + */ + private static void writeStarTreeNodes(IndexOutput output, InMemoryTreeNode rootNode) throws IOException { + Queue queue = new LinkedList<>(); + queue.add(rootNode); + + int currentNodeId = 0; + while (!queue.isEmpty()) { + InMemoryTreeNode node = queue.remove(); + + if (node.children == null || node.children.isEmpty()) { + writeStarTreeNode(output, node, ALL, ALL); + } else { + + // Sort all children nodes based on dimension value + List sortedChildren = new ArrayList<>(node.children.values()); + sortedChildren.sort( + Comparator.comparingInt(InMemoryTreeNode::getNodeType).thenComparingLong(InMemoryTreeNode::getDimensionValue) + ); + + int firstChildId = currentNodeId + queue.size() + 1; + int lastChildId = firstChildId + sortedChildren.size() - 1; + writeStarTreeNode(output, node, firstChildId, lastChildId); + + queue.addAll(sortedChildren); + } + + currentNodeId++; + } + } + + /** + * Writes a single star-tree node + * + * @param output the IndexOutput to write the node + * @param node the star tree node to write + * @param firstChildId the ID of the first child node + * @param lastChildId the ID of the last child node + * @throws IOException if an I/O error occurs while writing the node + */ + private static void writeStarTreeNode(IndexOutput output, InMemoryTreeNode node, int firstChildId, int lastChildId) throws IOException { + output.writeInt(node.dimensionId); + output.writeLong(node.dimensionValue); + output.writeInt(node.startDocId); + output.writeInt(node.endDocId); + output.writeInt(node.aggregatedDocId); + output.writeByte(node.nodeType); + output.writeInt(firstChildId); + output.writeInt(lastChildId); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java new file mode 100644 index 0000000000000..1c6df3886e08d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Writer package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.data; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java new file mode 100644 index 0000000000000..357c8a49f600c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; + +import org.opensearch.index.compositeindex.datacube.MetricStat; + +import java.util.Objects; + +/** + * Holds the pair of metric name and it's associated stat + * + * @opensearch.experimental + */ +public class MetricEntry { + + private final String metricFieldName; + private final MetricStat metricStat; + + public MetricEntry(String metricFieldName, MetricStat metricStat) { + this.metricFieldName = metricFieldName; + this.metricStat = metricStat; + } + + public String getMetricFieldName() { + return metricFieldName; + } + + public MetricStat getMetricStat() { + return metricStat; + } + + @Override + public int hashCode() { + return Objects.hashCode(metricFieldName + metricStat.getTypeName()); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj instanceof MetricEntry) { + MetricEntry anotherPair = (MetricEntry) obj; + return metricStat.equals(anotherPair.metricStat) && metricFieldName.equals(anotherPair.metricFieldName); + } + return false; + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java new file mode 100644 index 0000000000000..057d5e908d9d6 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java @@ -0,0 +1,156 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.List; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; + +/** + * The utility class for serializing the metadata of a star-tree data structure. + * The metadata includes information about the dimensions, metrics, and other relevant details + * related to the star tree. + * + * @opensearch.experimental + */ +public class StarTreeMetaWriter { + + private static final Logger logger = LogManager.getLogger(StarTreeMetaWriter.class); + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param starTreeField the star-tree field + * @param metricAggregatorInfos the list of metric aggregator information + * @param segmentAggregatedCount the aggregated document count for the segment + * @param dataFilePointer the file pointer to the start of the star tree data + * @param dataFileLength the length of the star tree data file + * @throws IOException if an I/O error occurs while serializing the metadata + */ + public static void writeStarTreeMetadata( + IndexOutput metaOut, + StarTreeField starTreeField, + List metricAggregatorInfos, + Integer segmentAggregatedCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + long initialMetaFilePointer = metaOut.getFilePointer(); + + writeMetaHeader(metaOut, CompositeMappedFieldType.CompositeFieldType.STAR_TREE, starTreeField.getName()); + writeMeta(metaOut, metricAggregatorInfos, starTreeField, segmentAggregatedCount, dataFilePointer, dataFileLength); + + logger.debug( + "Star tree meta size in bytes : {} for star-tree field {}", + metaOut.getFilePointer() - initialMetaFilePointer, + starTreeField.getName() + ); + } + + /** + * Writes the star-tree metadata header. + * + * @param metaOut the IndexOutput to write the header + * @param compositeFieldType the composite field type of the star-tree field + * @param starTreeFieldName the name of the star-tree field + * @throws IOException if an I/O error occurs while writing the header + */ + private static void writeMetaHeader( + IndexOutput metaOut, + CompositeMappedFieldType.CompositeFieldType compositeFieldType, + String starTreeFieldName + ) throws IOException { + // magic marker for sanity + metaOut.writeLong(COMPOSITE_FIELD_MARKER); + + // version + metaOut.writeVInt(VERSION_CURRENT); + + // star tree field name + metaOut.writeString(starTreeFieldName); + + // star tree field type + metaOut.writeString(compositeFieldType.getName()); + } + + /** + * Writes the star-tree metadata. + * + * @param metaOut the IndexOutput to write the metadata + * @param metricAggregatorInfos the list of metric aggregator information + * @param starTreeField the star tree field + * @param segmentAggregatedDocCount the aggregated document count for the segment + * @param dataFilePointer the file pointer to the start of the star-tree data + * @param dataFileLength the length of the star-tree data file + * @throws IOException if an I/O error occurs while writing the metadata + */ + private static void writeMeta( + IndexOutput metaOut, + List metricAggregatorInfos, + StarTreeField starTreeField, + Integer segmentAggregatedDocCount, + long dataFilePointer, + long dataFileLength + ) throws IOException { + + // number of dimensions + metaOut.writeVInt(starTreeField.getDimensionsOrder().size()); + + // dimensions + for (Dimension dimension : starTreeField.getDimensionsOrder()) { + metaOut.writeString(dimension.getField()); + } + + // number of metrics + metaOut.writeVInt(metricAggregatorInfos.size()); + + // metric - metric stat pair + for (MetricAggregatorInfo metricAggregatorInfo : metricAggregatorInfos) { + metaOut.writeString(metricAggregatorInfo.getField()); + int metricStatOrdinal = metricAggregatorInfo.getMetricStat().getMetricOrdinal(); + metaOut.writeVInt(metricStatOrdinal); + } + + // segment aggregated document count + metaOut.writeVInt(segmentAggregatedDocCount); + + // max leaf docs + metaOut.writeVInt(starTreeField.getStarTreeConfig().maxLeafDocs()); + + // number of skip star node creation dimensions + metaOut.writeVInt(starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims().size()); + + // skip star node creations + for (String dimension : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + metaOut.writeString(dimension); + } + + // star tree build-mode + metaOut.writeByte(starTreeField.getStarTreeConfig().getBuildMode().getBuildModeOrdinal()); + + // star-tree data file pointer + metaOut.writeVLong(dataFilePointer); + + // star-tree data file length + metaOut.writeVLong(dataFileLength); + + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java new file mode 100644 index 0000000000000..b1359d4ce8d57 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -0,0 +1,271 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.IndexInput; +import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.mapper.CompositeMappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Holds the associated metadata for the building of star-tree. + * + * @opensearch.experimental + */ +public class StarTreeMetadata extends CompositeIndexMetadata { + private static final Logger logger = LogManager.getLogger(StarTreeMetadata.class); + private final IndexInput meta; + private final String starTreeFieldName; + private final String starTreeFieldType; + private final List dimensionFields; + private final List metricEntries; + private final Integer segmentAggregatedDocCount; + private final Integer maxLeafDocs; + private final Set skipStarNodeCreationInDims; + private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode; + private final long dataStartFilePointer; + private final long dataLength; + + /** + * A star tree metadata constructor to initialize star tree metadata from the segment file (.cim) using index input. + * + * @param metaIn an index input to read star-tree meta + * @param compositeFieldName name of the composite field. Here, name of the star-tree field. + * @param compositeFieldType type of the composite field. Here, STAR_TREE field. + * @throws IOException if unable to read star-tree metadata from the file + */ + public StarTreeMetadata(IndexInput metaIn, String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) + throws IOException { + super(compositeFieldName, compositeFieldType); + this.meta = metaIn; + try { + this.starTreeFieldName = this.getCompositeFieldName(); + this.starTreeFieldType = this.getCompositeFieldType().getName(); + this.dimensionFields = readStarTreeDimensions(); + this.metricEntries = readMetricEntries(); + this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); + this.maxLeafDocs = readMaxLeafDocs(); + this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims(); + this.starTreeBuildMode = readBuildMode(); + this.dataStartFilePointer = readDataStartFilePointer(); + this.dataLength = readDataLength(); + } catch (Exception e) { + logger.error("Unable to read star-tree metadata from the file"); + throw new CorruptIndexException("Unable to read star-tree metadata from the file", metaIn); + } + } + + /** + * A star tree metadata constructor to initialize star tree metadata. + * Used for testing. + * + * @param meta an index input to read star-tree meta + * @param compositeFieldName name of the composite field. Here, name of the star-tree field. + * @param compositeFieldType type of the composite field. Here, STAR_TREE field. + * @param dimensionFields list of dimension fields + * @param metricEntries list of metric entries + * @param segmentAggregatedDocCount segment aggregated doc count + * @param maxLeafDocs max leaf docs + * @param skipStarNodeCreationInDims set of dimensions to skip star node creation + * @param starTreeBuildMode star tree build mode + * @param dataStartFilePointer data start file pointer + * @param dataLength data length + */ + public StarTreeMetadata( + String compositeFieldName, + CompositeMappedFieldType.CompositeFieldType compositeFieldType, + IndexInput meta, + List dimensionFields, + List metricEntries, + Integer segmentAggregatedDocCount, + Integer maxLeafDocs, + Set skipStarNodeCreationInDims, + StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode, + long dataStartFilePointer, + long dataLength + ) { + super(compositeFieldName, compositeFieldType); + this.meta = meta; + this.starTreeFieldName = compositeFieldName; + this.starTreeFieldType = compositeFieldType.getName(); + this.dimensionFields = dimensionFields; + this.metricEntries = metricEntries; + this.segmentAggregatedDocCount = segmentAggregatedDocCount; + this.maxLeafDocs = maxLeafDocs; + this.skipStarNodeCreationInDims = skipStarNodeCreationInDims; + this.starTreeBuildMode = starTreeBuildMode; + this.dataStartFilePointer = dataStartFilePointer; + this.dataLength = dataLength; + } + + private int readDimensionsCount() throws IOException { + return meta.readVInt(); + } + + private List readStarTreeDimensions() throws IOException { + int dimensionCount = readDimensionsCount(); + List dimensionFields = new ArrayList<>(); + + for (int i = 0; i < dimensionCount; i++) { + dimensionFields.add(meta.readString()); + } + + return dimensionFields; + } + + private int readMetricsCount() throws IOException { + return meta.readVInt(); + } + + private List readMetricEntries() throws IOException { + int metricCount = readMetricsCount(); + List metricEntries = new ArrayList<>(); + + for (int i = 0; i < metricCount; i++) { + String metricFieldName = meta.readString(); + int metricStatOrdinal = meta.readVInt(); + metricEntries.add(new MetricEntry(metricFieldName, MetricStat.fromMetricOrdinal(metricStatOrdinal))); + } + + return metricEntries; + } + + private int readSegmentAggregatedDocCount() throws IOException { + return meta.readVInt(); + } + + private int readMaxLeafDocs() throws IOException { + return meta.readVInt(); + } + + private int readSkipStarNodeCreationInDimsCount() throws IOException { + return meta.readVInt(); + } + + private Set readSkipStarNodeCreationInDims() throws IOException { + + int skipStarNodeCreationInDimsCount = readSkipStarNodeCreationInDimsCount(); + Set skipStarNodeCreationInDims = new HashSet<>(); + for (int i = 0; i < skipStarNodeCreationInDimsCount; i++) { + skipStarNodeCreationInDims.add(meta.readString()); + } + return skipStarNodeCreationInDims; + } + + private StarTreeFieldConfiguration.StarTreeBuildMode readBuildMode() throws IOException { + return StarTreeFieldConfiguration.StarTreeBuildMode.fromBuildModeOrdinal(meta.readByte()); + } + + private long readDataStartFilePointer() throws IOException { + return meta.readVLong(); + } + + private long readDataLength() throws IOException { + return meta.readVLong(); + } + + /** + * Returns the name of the star-tree field. + * + * @return star-tree field name + */ + public String getStarTreeFieldName() { + return starTreeFieldName; + } + + /** + * Returns the type of the star tree field. + * + * @return star-tree field type + */ + public String getStarTreeFieldType() { + return starTreeFieldType; + } + + /** + * Returns the list of dimension field numbers. + * + * @return star-tree dimension field numbers + */ + public List getDimensionFields() { + return dimensionFields; + } + + /** + * Returns the list of metric entries. + * + * @return star-tree metric entries + */ + public List getMetricEntries() { + return metricEntries; + } + + /** + * Returns the aggregated document count for the star-tree. + * + * @return the aggregated document count for the star-tree. + */ + public Integer getSegmentAggregatedDocCount() { + return segmentAggregatedDocCount; + } + + /** + * Returns the max leaf docs for the star-tree. + * + * @return the max leaf docs. + */ + public Integer getMaxLeafDocs() { + return maxLeafDocs; + } + + /** + * Returns the set of dimensions for which star node will not be created in the star-tree. + * + * @return the set of dimensions. + */ + public Set getSkipStarNodeCreationInDims() { + return skipStarNodeCreationInDims; + } + + /** + * Returns the build mode for the star-tree. + * + * @return the star-tree build mode. + */ + public StarTreeFieldConfiguration.StarTreeBuildMode getStarTreeBuildMode() { + return starTreeBuildMode; + } + + /** + * Returns the file pointer to the start of the star-tree data. + * + * @return start file pointer for star-tree data + */ + public long getDataStartFilePointer() { + return dataStartFilePointer; + } + + /** + * Returns the length of star-tree data + * + * @return star-tree length + */ + public long getDataLength() { + return dataLength; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java new file mode 100644 index 0000000000000..a2480f03c4b5a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Meta package for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java new file mode 100644 index 0000000000000..917327757fc9b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/package-info.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * File formats for star tree + * + * @opensearch.experimental + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java new file mode 100644 index 0000000000000..8159d2039121c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -0,0 +1,186 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.RandomAccessInput; + +import java.io.IOException; +import java.util.Iterator; + +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * Fixed Length implementation of {@link StarTreeNode} + * + * @opensearch.experimental + */ +public class FixedLengthStarTreeNode implements StarTreeNode { + public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; + public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; + public static final int NUM_BYTE_SERIALIZABLE_FIELDS = 1; + public static final long SERIALIZABLE_DATA_SIZE_IN_BYTES = (Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS) + (Long.BYTES + * NUM_LONG_SERIALIZABLE_FIELDS) + (NUM_BYTE_SERIALIZABLE_FIELDS * Byte.BYTES); + private static final int DIMENSION_ID_OFFSET = 0; + private static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; + private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; + private static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; + private static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; + private static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Integer.BYTES; + private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES; + private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; + + public static final int INVALID_ID = -1; + + private final int nodeId; + private final int firstChildId; + + RandomAccessInput in; + + public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOException { + this.in = in; + this.nodeId = nodeId; + firstChildId = getInt(FIRST_CHILD_ID_OFFSET); + } + + private int getInt(int fieldOffset) throws IOException { + return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + private long getLong(int fieldOffset) throws IOException { + return in.readLong(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + private byte getByte(int fieldOffset) throws IOException { + return in.readByte(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); + } + + @Override + public int getDimensionId() throws IOException { + return getInt(DIMENSION_ID_OFFSET); + } + + @Override + public long getDimensionValue() throws IOException { + return getLong(DIMENSION_VALUE_OFFSET); + } + + @Override + public int getChildDimensionId() throws IOException { + if (firstChildId == INVALID_ID) { + return INVALID_ID; + } else { + return in.readInt(firstChildId * SERIALIZABLE_DATA_SIZE_IN_BYTES); + } + } + + @Override + public int getStartDocId() throws IOException { + return getInt(START_DOC_ID_OFFSET); + } + + @Override + public int getEndDocId() throws IOException { + return getInt(END_DOC_ID_OFFSET); + } + + @Override + public int getAggregatedDocId() throws IOException { + return getInt(AGGREGATE_DOC_ID_OFFSET); + } + + @Override + public int getNumChildren() throws IOException { + if (firstChildId == INVALID_ID) { + return 0; + } else { + return getInt(LAST_CHILD_ID_OFFSET) - firstChildId + 1; + } + } + + @Override + public boolean isLeaf() { + return firstChildId == INVALID_ID; + } + + @Override + public byte getStarTreeNodeType() throws IOException { + return getByte(STAR_NODE_TYPE_OFFSET); + } + + @Override + public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException { + // there will be no children for leaf nodes + if (isLeaf()) { + return null; + } + + // Specialize star node for performance + if (isStar) { + return handleStarNode(); + } + + return binarySearchChild(dimensionValue); + } + + private FixedLengthStarTreeNode handleStarNode() throws IOException { + FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); + if (firstNode.getDimensionValue() == ALL) { + return firstNode; + } else { + return null; + } + } + + private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { + // Binary search to find child node + int low = firstChildId; + int high = getInt(LAST_CHILD_ID_OFFSET); + + while (low <= high) { + int mid = low + (high - low) / 2; + FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid); + long midNodeDimensionValue = midNode.getDimensionValue(); + + if (midNodeDimensionValue == dimensionValue) { + return midNode; + } else if (midNodeDimensionValue < dimensionValue) { + low = mid + 1; + } else { + high = mid - 1; + } + } + return null; + } + + @Override + public Iterator getChildrenIterator() throws IOException { + return new Iterator<>() { + private int currentChildId = firstChildId; + private final int lastChildId = getInt(LAST_CHILD_ID_OFFSET); + + @Override + public boolean hasNext() { + return currentChildId <= lastChildId; + } + + @Override + public FixedLengthStarTreeNode next() { + try { + return new FixedLengthStarTreeNode(in, currentChildId++); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java new file mode 100644 index 0000000000000..20f7dcf184391 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/InMemoryTreeNode.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Map; + +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; + +/** + * /** + * Represents a node in a tree data structure, specifically designed for a star-tree implementation. + * A star-tree node will represent both star and non-star nodes. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class InMemoryTreeNode { + + /** + * The dimension id for the dimension (field) associated with this star-tree node. + */ + public int dimensionId = ALL; + + /** + * The starting document id (inclusive) associated with this star-tree node. + */ + public int startDocId = ALL; + + /** + * The ending document id (exclusive) associated with this star-tree node. + */ + public int endDocId = ALL; + + /** + * The aggregated document id associated with this star-tree node. + */ + public int aggregatedDocId = ALL; + + /** + * The child dimension identifier associated with this star-tree node. + */ + public int childDimensionId = ALL; + + /** + * The value of the dimension associated with this star-tree node. + */ + public long dimensionValue = ALL; + + /** + * A byte indicating whether the node is star node, null node or default node (with dimension value present). + */ + public byte nodeType = 0; + + /** + * A map containing the child nodes of this star-tree node, keyed by their dimension id. + */ + public Map children; + + public long getDimensionValue() { + return dimensionValue; + } + + public byte getNodeType() { + return nodeType; + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java new file mode 100644 index 0000000000000..4ed3c3ec9febe --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.data.StarTreeDataWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; + +import java.io.IOException; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; + +/** + * Off heap implementation of the star-tree. + * + * @opensearch.experimental + */ +public class StarTree { + private static final Logger logger = LogManager.getLogger(StarTree.class); + private final FixedLengthStarTreeNode root; + private final Integer numNodes; + + public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { + long magicMarker = data.readLong(); + if (COMPOSITE_FIELD_MARKER != magicMarker) { + logger.error("Invalid magic marker"); + throw new IOException("Invalid magic marker"); + } + int version = data.readInt(); + if (VERSION_CURRENT != version) { + logger.error("Invalid star tree version"); + throw new IOException("Invalid version"); + } + numNodes = data.readInt(); // num nodes + + RandomAccessInput in = data.randomAccessSlice( + StarTreeDataWriter.computeStarTreeDataHeaderByteSize(), + starTreeMetadata.getDataLength() - StarTreeDataWriter.computeStarTreeDataHeaderByteSize() + ); + root = new FixedLengthStarTreeNode(in, 0); + } + + public StarTreeNode getRoot() { + return root; + } + + /** + * Returns the number of nodes in star-tree + * + * @return number of nodes in te star-tree + */ + public Integer getNumNodes() { + return numNodes; + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java index 59522ffa4be89..dd9d301096f44 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java @@ -20,7 +20,6 @@ */ @ExperimentalApi public interface StarTreeNode { - long ALL = -1l; /** * Returns the dimension ID of the current star-tree node. @@ -86,12 +85,20 @@ public interface StarTreeNode { boolean isLeaf(); /** - * Checks if the current node is a star node. + * Determines the type of the current node in the Star Tree index structure. * - * @return true if the node is a star node, false otherwise - * @throws IOException if an I/O error occurs while reading the star node status + *

The node type can be one of the following: + *

    + *
  • Star Node: Represented by the value -2. + *
  • Null Node: Represented by the value -1. + *
  • Default Node: Represented by the value 0. + *
+ * @see StarTreeNodeType + * + * @return The type of the current node, represented by the corresponding integer value (-2, -1, or 0). + * @throws IOException if an I/O error occurs while reading the node type */ - boolean isStarNode() throws IOException; + byte getStarTreeNodeType() throws IOException; /** * Returns the child star-tree node for the given dimension value. @@ -100,7 +107,7 @@ public interface StarTreeNode { * @return the child node for the given dimension value or null if child is not present * @throws IOException if an I/O error occurs while retrieving the child node */ - StarTreeNode getChildForDimensionValue(long dimensionValue) throws IOException; + StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException; /** * Returns an iterator over the children of the current star-tree node. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java new file mode 100644 index 0000000000000..2dcec37322778 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +/** + * Represents the different types of nodes in a StarTree data structure. + * + *

+ * In order to handle different node types, we use a byte value to represent the node type. + * This enum provides a convenient way to map byte values to their corresponding node types. + * + *

+ * Star and Null Nodes are represented as special cases. Default is the general case. + * Star and null nodes are represented with negative ordinal values to ensure that they are + * sorted before the default nodes, which are sorted based on their dimension values. + * + *

+ * The node type can be one of the following: + *

    + *
  • Star Node: Represented by the value -2. A star node is a special node that represents + * all possible values for a dimension.
  • + *
  • Null Node: Represented by the value -1. A null node indicates the absence of any value + * for a dimension.
  • + *
  • Default Node: Represented by the value 0. A default node represents a node with an + * actual dimension value.
  • + *
+ * + * By default, we want to consider nodes as default node. + * + * @opensearch.experimental + * @see StarTreeNode + */ +public enum StarTreeNodeType { + + /** + * Represents a star node type. + * + */ + STAR("star", (byte) -2), + + /** + * Represents a null node type. + */ + NULL("null", (byte) -1), + + /** + * Represents a default node type. + */ + DEFAULT("default", (byte) 0); + + private final String name; + private final byte value; + + /** + * Constructs a new StarTreeNodeType with the given name and value. + * + * @param name the name of the node type + * @param value the value associated with the node type + */ + StarTreeNodeType(String name, byte value) { + this.name = name; + this.value = value; + } + + /** + * Returns the name of the node type. + * + * @return the name of the node type + */ + public String getName() { + return name; + } + + /** + * Returns the value associated with the node type. + * + * @return the value associated with the node type + */ + public byte getValue() { + return value; + } + + /** + * Returns the StarTreeNodeType enum constant with the specified value. + * + * @param value the value of the enum constant to return + * @return the enum constant with the specified value, or null if no such constant exists + */ + public static StarTreeNodeType fromValue(byte value) { + for (StarTreeNodeType nodeType : StarTreeNodeType.values()) { + if (nodeType.getValue() == value) { + return nodeType; + } + } + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java new file mode 100644 index 0000000000000..dc155df4eafca --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtils.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; + +import java.util.Collections; +import java.util.List; + +/** + * Util class for building star tree + * + * @opensearch.experimental + */ +public class StarTreeUtils { + + private StarTreeUtils() {} + + public static final int ALL = -1; + + /** + * The suffix appended to dimension field names in the Star Tree index. + */ + public static final String DIMENSION_SUFFIX = "dim"; + + /** + * The suffix appended to metric field names in the Star Tree index. + */ + public static final String METRIC_SUFFIX = "metric"; + + /** + * Returns the full field name for a dimension in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param dimensionName name of the dimension + * @return full field name for the dimension in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeDimensionsDocValues(String starTreeFieldName, String dimensionName) { + return starTreeFieldName + "_" + dimensionName + "_" + DIMENSION_SUFFIX; + } + + /** + * Returns the full field name for a metric in the star-tree index. + * + * @param starTreeFieldName star-tree field name + * @param fieldName name of the metric field + * @param metricName name of the metric + * @return full field name for the metric in the star-tree index + */ + public static String fullyQualifiedFieldNameForStarTreeMetricsDocValues(String starTreeFieldName, String fieldName, String metricName) { + return MetricAggregatorInfo.toFieldName(starTreeFieldName, fieldName, metricName) + "_" + METRIC_SUFFIX; + } + + /** + * Get field infos from field names + * + * @param fields field names + * @return field infos + */ + public static FieldInfo[] getFieldInfoList(List fields) { + FieldInfo[] fieldInfoList = new FieldInfo[fields.size()]; + + // field number is not really used. We depend on unique field names to get the desired iterator + int fieldNumber = 0; + + for (String fieldName : fields) { + fieldInfoList[fieldNumber] = getFieldInfo(fieldName, fieldNumber); + fieldNumber++; + } + return fieldInfoList; + } + + /** + * Get new field info instance for a given field name and field number + * @param fieldName name of the field + * @param fieldNumber number of the field + * @return new field info instance + */ + public static FieldInfo getFieldInfo(String fieldName, int fieldNumber) { + return new FieldInfo( + fieldName, + fieldNumber, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java new file mode 100644 index 0000000000000..1d513d9e53d44 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -0,0 +1,210 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.data; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Queue; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class StarTreeFileFormatsTests extends OpenSearchTestCase { + + private IndexOutput dataOut; + private IndexInput dataIn; + private Directory directory; + + @Before + public void setup() throws IOException { + directory = newFSDirectory(createTempDir()); + } + + public void test_StarTreeNode() throws IOException { + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + Map levelOrderStarTreeNodeMap = new LinkedHashMap<>(); + InMemoryTreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); + long starTreeDataLength = StarTreeWriter.writeStarTree(dataOut, root, 7, "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, 247); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + StarTree starTree = new StarTree(dataIn, starTreeMetadata); + + StarTreeNode starTreeNode = starTree.getRoot(); + Queue queue = new ArrayDeque<>(); + queue.add(starTreeNode); + + while ((starTreeNode = queue.poll()) != null) { + + // verify the star node + assertStarTreeNode(starTreeNode, levelOrderStarTreeNodeMap.get(starTreeNode.getDimensionValue())); + + Iterator childrenIterator = starTreeNode.getChildrenIterator(); + + if (starTreeNode.getChildDimensionId() != -1) { + while (childrenIterator.hasNext()) { + StarTreeNode child = childrenIterator.next(); + assertStarTreeNode( + starTreeNode.getChildForDimensionValue(child.getDimensionValue(), false), + levelOrderStarTreeNodeMap.get(child.getDimensionValue()) + ); + queue.add(child); + } + } + } + + dataIn.close(); + + } + + private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode treeNode) throws IOException { + assertEquals(starTreeNode.getDimensionId(), treeNode.dimensionId); + assertEquals(starTreeNode.getDimensionValue(), treeNode.dimensionValue); + assertEquals(starTreeNode.getStartDocId(), treeNode.startDocId); + assertEquals(starTreeNode.getEndDocId(), treeNode.endDocId); + assertEquals(starTreeNode.getChildDimensionId(), treeNode.childDimensionId); + assertEquals(starTreeNode.getAggregatedDocId(), treeNode.aggregatedDocId); + + if (starTreeNode.getChildDimensionId() != -1) { + assertFalse(starTreeNode.isLeaf()); + if (treeNode.children != null) { + assertEquals(starTreeNode.getNumChildren(), treeNode.children.values().size()); + } + } else { + assertTrue(starTreeNode.isLeaf()); + } + + } + + private InMemoryTreeNode generateSampleTree(Map levelOrderStarTreeNode) { + // Create the root node + InMemoryTreeNode root = new InMemoryTreeNode(); + root.dimensionId = 0; + root.startDocId = 0; + root.endDocId = 100; + root.childDimensionId = 1; + root.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + root.children = new HashMap<>(); + + levelOrderStarTreeNode.put(root.dimensionValue, root); + + // Create child nodes for dimension 1 + InMemoryTreeNode dim1Node1 = new InMemoryTreeNode(); + dim1Node1.dimensionId = 1; + dim1Node1.dimensionValue = 1; + dim1Node1.startDocId = 0; + dim1Node1.endDocId = 50; + dim1Node1.childDimensionId = 2; + dim1Node1.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim1Node1.children = new HashMap<>(); + + InMemoryTreeNode dim1Node2 = new InMemoryTreeNode(); + dim1Node2.dimensionId = 1; + dim1Node2.dimensionValue = 2; + dim1Node2.startDocId = 50; + dim1Node2.endDocId = 100; + dim1Node2.childDimensionId = 2; + dim1Node2.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim1Node2.children = new HashMap<>(); + + root.children.put(1L, dim1Node1); + root.children.put(2L, dim1Node2); + + levelOrderStarTreeNode.put(dim1Node1.dimensionValue, dim1Node1); + levelOrderStarTreeNode.put(dim1Node2.dimensionValue, dim1Node2); + + // Create child nodes for dimension 2 + InMemoryTreeNode dim2Node1 = new InMemoryTreeNode(); + dim2Node1.dimensionId = 2; + dim2Node1.dimensionValue = 3; + dim2Node1.startDocId = 0; + dim2Node1.endDocId = 25; + dim2Node1.childDimensionId = -1; + dim2Node1.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node1.children = null; + + InMemoryTreeNode dim2Node2 = new InMemoryTreeNode(); + dim2Node2.dimensionId = 2; + dim2Node2.dimensionValue = 4; + dim2Node2.startDocId = 25; + dim2Node2.endDocId = 50; + dim2Node2.childDimensionId = -1; + dim2Node2.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node2.children = null; + + InMemoryTreeNode dim2Node3 = new InMemoryTreeNode(); + dim2Node3.dimensionId = 2; + dim2Node3.dimensionValue = 5; + dim2Node3.startDocId = 50; + dim2Node3.endDocId = 75; + dim2Node3.childDimensionId = -1; + dim2Node3.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node3.children = null; + + InMemoryTreeNode dim2Node4 = new InMemoryTreeNode(); + dim2Node4.dimensionId = 2; + dim2Node4.dimensionValue = 6; + dim2Node4.startDocId = 75; + dim2Node4.endDocId = 100; + dim2Node4.childDimensionId = -1; + dim2Node4.aggregatedDocId = randomInt(); + root.nodeType = (byte) 0; + dim2Node4.children = null; + + dim1Node1.children.put(3L, dim2Node1); + dim1Node1.children.put(4L, dim2Node2); + dim1Node2.children.put(5L, dim2Node3); + dim1Node2.children.put(6L, dim2Node4); + + levelOrderStarTreeNode.put(dim2Node1.dimensionValue, dim2Node1); + levelOrderStarTreeNode.put(dim2Node2.dimensionValue, dim2Node2); + levelOrderStarTreeNode.put(dim2Node3.dimensionValue, dim2Node3); + levelOrderStarTreeNode.put(dim2Node4.dimensionValue, dim2Node4); + + return root; + } + + public void tearDown() throws Exception { + super.tearDown(); + dataIn.close(); + dataOut.close(); + directory.close(); + } + +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java new file mode 100644 index 0000000000000..a67701847bf92 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java @@ -0,0 +1,209 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; + +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.Version; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.Metric; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; +import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; +import org.opensearch.index.fielddata.IndexNumericFieldData; +import org.opensearch.index.mapper.CompositeMappedFieldType; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.UUID; + +import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; +import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; + +public class StarTreeMetaTests extends OpenSearchTestCase { + + private IndexOutput metaOut; + private IndexInput metaIn; + private StarTreeField starTreeField; + private SegmentWriteState writeState; + private Directory directory; + private FieldInfo[] fieldsInfo; + private List dimensionsOrder; + private List fields = List.of(); + private List metrics; + private List metricAggregatorInfos = new ArrayList<>(); + private int segmentDocumentCount; + private long dataFilePointer; + private long dataFileLength; + + @Before + public void setup() throws IOException { + fields = List.of("field1", "field2", "field3", "field4", "field5", "field6", "field7", "field8", "field9", "field10"); + directory = newFSDirectory(createTempDir()); + SegmentInfo segmentInfo = new SegmentInfo( + directory, + Version.LATEST, + Version.LUCENE_9_11_0, + "test_segment", + 6, + false, + false, + new Lucene99Codec(), + new HashMap<>(), + UUID.randomUUID().toString().substring(0, 16).getBytes(StandardCharsets.UTF_8), + new HashMap<>(), + null + ); + + fieldsInfo = new FieldInfo[fields.size()]; + for (int i = 0; i < fieldsInfo.length; i++) { + fieldsInfo[i] = new FieldInfo( + fields.get(i), + i, + false, + false, + true, + IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, + DocValuesType.SORTED_NUMERIC, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + 0, + VectorEncoding.FLOAT32, + VectorSimilarityFunction.EUCLIDEAN, + false, + false + ); + } + FieldInfos fieldInfos = new FieldInfos(fieldsInfo); + writeState = new SegmentWriteState(InfoStream.getDefault(), segmentInfo.dir, segmentInfo, fieldInfos, null, newIOContext(random())); + } + + public void test_starTreeMetadata() throws IOException { + dimensionsOrder = List.of( + new NumericDimension("field1"), + new NumericDimension("field3"), + new NumericDimension("field5"), + new NumericDimension("field8") + ); + metrics = List.of( + new Metric("field2", List.of(MetricStat.SUM)), + new Metric("field4", List.of(MetricStat.SUM)), + new Metric("field6", List.of(MetricStat.COUNT)) + ); + int maxLeafDocs = randomNonNegativeInt(); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + maxLeafDocs, + new HashSet<>(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + starTreeField = new StarTreeField("star_tree", dimensionsOrder, metrics, starTreeFieldConfiguration); + + for (Metric metric : metrics) { + for (MetricStat metricType : metric.getMetrics()) { + MetricAggregatorInfo metricAggregatorInfo = new MetricAggregatorInfo( + metricType, + metric.getField(), + starTreeField.getName(), + IndexNumericFieldData.NumericType.DOUBLE + ); + metricAggregatorInfos.add(metricAggregatorInfo); + } + } + + dataFileLength = randomNonNegativeLong(); + dataFilePointer = randomNonNegativeLong(); + segmentDocumentCount = randomNonNegativeInt(); + metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); + StarTreeWriter.writeStarTreeMetadata( + metaOut, + starTreeField, + metricAggregatorInfos, + segmentDocumentCount, + dataFilePointer, + dataFileLength + ); + metaOut.close(); + + // reading and asserting the metadata + metaIn = directory.openInput("star-tree-metadata", IOContext.READONCE); + assertEquals(COMPOSITE_FIELD_MARKER, metaIn.readLong()); + assertEquals(VERSION_CURRENT, metaIn.readVInt()); + + String compositeFieldName = metaIn.readString(); + CompositeMappedFieldType.CompositeFieldType compositeFieldType = CompositeMappedFieldType.CompositeFieldType.fromName( + metaIn.readString() + ); + + StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType); + assertEquals(starTreeField.getName(), starTreeMetadata.getCompositeFieldName()); + assertEquals(STAR_TREE, starTreeMetadata.getCompositeFieldType()); + + assertNotNull(starTreeMetadata); + + for (int i = 0; i < dimensionsOrder.size(); i++) { + assertEquals(dimensionsOrder.get(i).getField(), starTreeMetadata.getDimensionFields().get(i)); + } + + for (int i = 0; i < metricAggregatorInfos.size(); i++) { + MetricEntry metricEntry = starTreeMetadata.getMetricEntries().get(i); + assertEquals(metricAggregatorInfos.get(i).getField(), metricEntry.getMetricFieldName()); + assertEquals(metricAggregatorInfos.get(i).getMetricStat(), metricEntry.getMetricStat()); + } + assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0); + assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0); + assertEquals( + starTreeFieldConfiguration.getSkipStarNodeCreationInDims().size(), + starTreeMetadata.getSkipStarNodeCreationInDims().size() + ); + for (String skipStarNodeCreationInDims : starTreeField.getStarTreeConfig().getSkipStarNodeCreationInDims()) { + assertTrue(starTreeMetadata.getSkipStarNodeCreationInDims().contains(skipStarNodeCreationInDims)); + } + assertEquals(starTreeFieldConfiguration.getBuildMode(), starTreeMetadata.getStarTreeBuildMode()); + assertEquals(dataFileLength, starTreeMetadata.getDataLength()); + assertEquals(dataFilePointer, starTreeMetadata.getDataStartFilePointer()); + + metaIn.close(); + + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + metaOut.close(); + metaIn.close(); + directory.close(); + } + +} diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index 6afc7c23d9e66..1b740476f89c3 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -807,6 +807,14 @@ public static int randomInt() { return random().nextInt(); } + /** + * @return a int between 0 and Integer.MAX_VALUE (inclusive) chosen uniformly at random. + */ + public static int randomNonNegativeInt() { + int randomInt = randomInt(); + return randomInt == Integer.MIN_VALUE ? 0 : Math.abs(randomInt); + } + /** * @return a long between 0 and Long.MAX_VALUE (inclusive) chosen uniformly at random. */ From f84fc392e9fff9f3d55b8139930773a7bda905bc Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Thu, 22 Aug 2024 17:12:22 +0530 Subject: [PATCH 02/11] addressing nits Signed-off-by: Sarthak Aggarwal --- .../CompositeIndexConstants.java | 2 +- .../CompositeIndexMetadata.java | 2 +- .../fileformats/meta/StarTreeMetadata.java | 67 ++++++++++--- .../node/FixedLengthStarTreeNode.java | 93 ++++++++++++++++++- .../startree/node/StarTreeNodeType.java | 2 +- .../fileformats/meta/StarTreeMetaTests.java | 2 +- 6 files changed, 150 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java index ffa3dee3578e5..9402675ff39d9 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexConstants.java @@ -19,7 +19,7 @@ public class CompositeIndexConstants { public static final long COMPOSITE_FIELD_MARKER = 0xC0950513F1E1DL; // Composite Field /** - * Represents the key to fetch number of documents in a segment. + * Represents the key to fetch number of non-star aggregated segment documents. */ public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount"; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java index 6ba401afe0e6f..4972c877d4ab8 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/CompositeIndexMetadata.java @@ -13,7 +13,7 @@ /** * This class represents the metadata of a Composite Index, which includes information about * the composite field name, type, and the specific metadata for the type of composite field - * (e.g., Tree metadata). + * (e.g., Star Tree metadata). * * @opensearch.experimental */ diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java index b1359d4ce8d57..a8a5e3c1b03b1 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -29,22 +29,67 @@ */ public class StarTreeMetadata extends CompositeIndexMetadata { private static final Logger logger = LogManager.getLogger(StarTreeMetadata.class); + + /** + * The index input for reading metadata from the segment file. + */ private final IndexInput meta; + + /** + * The name of the star-tree field, used to identify the star-tree. + */ private final String starTreeFieldName; + + /** + * The type of the star-tree field, indicating the specific implementation or version. + * Here, STAR_TREE field. + */ private final String starTreeFieldType; + + /** + * List of dimension fields used in the star-tree. + */ private final List dimensionFields; + + /** + * List of metric entries, containing field names and associated metric statistic. + */ private final List metricEntries; + + /** + * The total number of documents aggregated in this star-tree segment. + */ private final Integer segmentAggregatedDocCount; + + /** + * The maximum number of documents allowed in a leaf node. + */ private final Integer maxLeafDocs; + + /** + * Set of dimensions for which star node creation should be skipped. + */ private final Set skipStarNodeCreationInDims; + + /** + * The build mode used for constructing the star-tree. + */ private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode; + + /** + * The file pointer to the start of the associated star-tree data in the (.cid) file + */ private final long dataStartFilePointer; + + /** + * The length of the star-tree data in bytes, used for reading the correct amount of data from (.cid) file + */ private final long dataLength; /** * A star tree metadata constructor to initialize star tree metadata from the segment file (.cim) using index input. * - * @param metaIn an index input to read star-tree meta + * @param metaIn an index input to read star-tree meta * @param compositeFieldName name of the composite field. Here, name of the star-tree field. * @param compositeFieldType type of the composite field. Here, STAR_TREE field. * @throws IOException if unable to read star-tree metadata from the file @@ -74,17 +119,17 @@ public StarTreeMetadata(IndexInput metaIn, String compositeFieldName, CompositeM * A star tree metadata constructor to initialize star tree metadata. * Used for testing. * - * @param meta an index input to read star-tree meta - * @param compositeFieldName name of the composite field. Here, name of the star-tree field. - * @param compositeFieldType type of the composite field. Here, STAR_TREE field. - * @param dimensionFields list of dimension fields - * @param metricEntries list of metric entries - * @param segmentAggregatedDocCount segment aggregated doc count - * @param maxLeafDocs max leaf docs + * @param meta an index input to read star-tree meta + * @param compositeFieldName name of the composite field. Here, name of the star-tree field. + * @param compositeFieldType type of the composite field. Here, STAR_TREE field. + * @param dimensionFields list of dimension fields + * @param metricEntries list of metric entries + * @param segmentAggregatedDocCount segment aggregated doc count + * @param maxLeafDocs max leaf docs * @param skipStarNodeCreationInDims set of dimensions to skip star node creation - * @param starTreeBuildMode star tree build mode - * @param dataStartFilePointer data start file pointer - * @param dataLength data length + * @param starTreeBuildMode star tree build mode + * @param dataStartFilePointer star file pointer to the associated star tree data in (.cid) file + * @param dataLength length of the corresponding star-tree data in (.cid) file */ public StarTreeMetadata( String compositeFieldName, diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java index 8159d2039121c..098befa3e7172 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -10,21 +10,55 @@ import org.apache.lucene.store.RandomAccessInput; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Iterator; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; /** - * Fixed Length implementation of {@link StarTreeNode} + * Fixed Length implementation of {@link StarTreeNode}. + *

+ * This class represents a node in a star tree with a fixed-length serialization format. + * It provides efficient storage and retrieval of node information using a RandomAccessInput. + * The node structure includes the methods to access all the constructs of InMemoryTreeNode. + * + *

+ * Key features: + * - Fixed-size serialization for each node, allowing for efficient random access + * - Binary search capability for finding child nodes + * - Support for star nodes, null nodes and other default nodes + * - Iteration over child nodes + *

+ * + * The class uses specific byte offsets for each field in the serialized format, + * enabling direct access to node properties without parsing the entire node structure. * * @opensearch.experimental */ public class FixedLengthStarTreeNode implements StarTreeNode { + + /** + * Number of integer fields in the serializable data + */ public static final int NUM_INT_SERIALIZABLE_FIELDS = 6; + + /** + * Number of long fields in the serializable data + */ public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1; + + /** + * Number of byte fields in the serializable data + */ public static final int NUM_BYTE_SERIALIZABLE_FIELDS = 1; + + /** + * Total size in bytes of the serializable data for each node + */ public static final long SERIALIZABLE_DATA_SIZE_IN_BYTES = (Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS) + (Long.BYTES * NUM_LONG_SERIALIZABLE_FIELDS) + (NUM_BYTE_SERIALIZABLE_FIELDS * Byte.BYTES); + + // Byte offsets for each field in the serialized data private static final int DIMENSION_ID_OFFSET = 0; private static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; @@ -34,27 +68,68 @@ public class FixedLengthStarTreeNode implements StarTreeNode { private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES; private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; + /** + * Constant representing an invalid node ID + */ public static final int INVALID_ID = -1; + /** + * The ID of this node + */ private final int nodeId; + + /** + * The ID of the first child of this node + */ private final int firstChildId; + /** + * The input source for reading node data + */ RandomAccessInput in; + /** + * Constructs a FixedLengthStarTreeNode. + * + * @param in The RandomAccessInput to read node data from + * @param nodeId The ID of this node + * @throws IOException If there's an error reading from the input + */ public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOException { this.in = in; this.nodeId = nodeId; firstChildId = getInt(FIRST_CHILD_ID_OFFSET); } + /** + * Reads an integer value from the specified offset in the node's data. + * + * @param fieldOffset The offset of the field to read + * @return The integer value at the specified offset + * @throws IOException If there's an error reading from the input + */ private int getInt(int fieldOffset) throws IOException { return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); } + /** + * Reads a long value from the specified offset in the node's data. + * + * @param fieldOffset The offset of the field to read + * @return The long value at the specified offset + * @throws IOException If there's an error reading from the input + */ private long getLong(int fieldOffset) throws IOException { return in.readLong(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); } + /** + * Reads a byte value from the specified offset in the node's data. + * + * @param fieldOffset The offset of the field to read + * @return The byte value at the specified offset + * @throws IOException If there's an error reading from the input + */ private byte getByte(int fieldOffset) throws IOException { return in.readByte(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset); } @@ -127,6 +202,12 @@ public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isSta return binarySearchChild(dimensionValue); } + /** + * Handles the special case of a star node. + * + * @return The star node if found, null otherwise + * @throws IOException If there's an error reading from the input + */ private FixedLengthStarTreeNode handleStarNode() throws IOException { FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); if (firstNode.getDimensionValue() == ALL) { @@ -136,8 +217,14 @@ private FixedLengthStarTreeNode handleStarNode() throws IOException { } } + /** + * Performs a binary search to find a child node with the given dimension value. + * + * @param dimensionValue The dimension value to search for + * @return The child node if found, null otherwise + * @throws IOException If there's an error reading from the input + */ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { - // Binary search to find child node int low = firstChildId; int high = getInt(LAST_CHILD_ID_OFFSET); @@ -173,7 +260,7 @@ public FixedLengthStarTreeNode next() { try { return new FixedLengthStarTreeNode(in, currentChildId++); } catch (IOException e) { - throw new RuntimeException(e); + throw new UncheckedIOException(e); } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java index 2dcec37322778..7eb2ccd8bcd0e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java @@ -98,6 +98,6 @@ public static StarTreeNodeType fromValue(byte value) { return nodeType; } } - return null; + throw new IllegalStateException("Unrecognized value byte to determine star-tree node type: [" + value + "]"); } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java index a67701847bf92..2df661aff8195 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java @@ -120,7 +120,7 @@ public void test_starTreeMetadata() throws IOException { metrics = List.of( new Metric("field2", List.of(MetricStat.SUM)), new Metric("field4", List.of(MetricStat.SUM)), - new Metric("field6", List.of(MetricStat.COUNT)) + new Metric("field6", List.of(MetricStat.VALUE_COUNT)) ); int maxLeafDocs = randomNonNegativeInt(); StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( From 99d961bfcf35caedbc485ee2164bc41dff9d2f5e Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Sun, 25 Aug 2024 13:31:28 +0530 Subject: [PATCH 03/11] addressing nits Signed-off-by: Sarthak Aggarwal --- .../LuceneDocValuesConsumerFactory.java | 20 +++-- .../LuceneDocValuesProducerFactory.java | 19 ++--- .../startree/fileformats/StarTreeWriter.java | 6 +- .../node/FixedLengthStarTreeNode.java | 5 +- .../data/StarTreeFileFormatsTests.java | 3 +- .../fileformats/meta/StarTreeMetaTests.java | 3 +- .../startree/utils/StarTreeUtilsTest.java | 79 +++++++++++++++++++ 7 files changed, 106 insertions(+), 29 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTest.java diff --git a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java index 1ed672870337e..4b3f62b6171da 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesConsumerFactory.java @@ -34,17 +34,15 @@ public static DocValuesConsumer getDocValuesConsumerForCompositeCodec( String metaCodec, String metaExtension ) throws IOException { - try ( - Lucene90DocValuesConsumerWrapper lucene90DocValuesConsumerWrapper = new Lucene90DocValuesConsumerWrapper( - state, - dataCodec, - dataExtension, - metaCodec, - metaExtension - ) - ) { - return lucene90DocValuesConsumerWrapper.getLucene90DocValuesConsumer(); - } + Lucene90DocValuesConsumerWrapper lucene90DocValuesConsumerWrapper = new Lucene90DocValuesConsumerWrapper( + state, + dataCodec, + dataExtension, + metaCodec, + metaExtension + ); + return lucene90DocValuesConsumerWrapper.getLucene90DocValuesConsumer(); + } } diff --git a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java index 611a97ffeb834..d85205d239648 100644 --- a/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java +++ b/server/src/main/java/org/opensearch/index/codec/composite/LuceneDocValuesProducerFactory.java @@ -40,17 +40,14 @@ public static DocValuesProducer getDocValuesProducerForCompositeCodec( switch (compositeCodec) { case Composite99Codec.COMPOSITE_INDEX_CODEC_NAME: - try ( - Lucene90DocValuesProducerWrapper lucene90DocValuesProducerWrapper = new Lucene90DocValuesProducerWrapper( - state, - dataCodec, - dataExtension, - metaCodec, - metaExtension - ) - ) { - return lucene90DocValuesProducerWrapper.getLucene90DocValuesProducer(); - } + Lucene90DocValuesProducerWrapper lucene90DocValuesProducerWrapper = new Lucene90DocValuesProducerWrapper( + state, + dataCodec, + dataExtension, + metaCodec, + metaExtension + ); + return lucene90DocValuesProducerWrapper.getLucene90DocValuesProducer(); default: throw new IllegalStateException("Invalid composite codec " + "[" + compositeCodec + "]"); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java index 00355fd187f49..19dcc16e7f95f 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java @@ -30,7 +30,7 @@ public class StarTreeWriter { /** Current version for the star tree writer */ public static final int VERSION_CURRENT = VERSION_START; - private StarTreeWriter() {} + public StarTreeWriter() {} /** * Write star tree to index output stream @@ -42,7 +42,7 @@ private StarTreeWriter() {} * @return total size of the three * @throws IOException when star-tree data serialization fails */ - public static long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { + public long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { return StarTreeDataWriter.writeStarTree(dataOut, rootNode, numNodes, name); } @@ -57,7 +57,7 @@ public static long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, * @param dataFileLength data file length * @throws IOException when star-tree data serialization fails */ - public static void writeStarTreeMetadata( + public void writeStarTreeMetadata( IndexOutput metaOut, StarTreeField starTreeField, List metricAggregatorInfos, diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java index 098befa3e7172..96d82070abcf8 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -29,7 +29,6 @@ * - Support for star nodes, null nodes and other default nodes * - Iteration over child nodes *

- * * The class uses specific byte offsets for each field in the serialized format, * enabling direct access to node properties without parsing the entire node structure. * @@ -199,7 +198,9 @@ public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isSta return handleStarNode(); } - return binarySearchChild(dimensionValue); + StarTreeNode resultStarTreeNode = binarySearchChild(dimensionValue); + assert null != resultStarTreeNode; + return resultStarTreeNode; } /** diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java index 1d513d9e53d44..ebef961a33d5f 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -47,7 +47,8 @@ public void test_StarTreeNode() throws IOException { dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); Map levelOrderStarTreeNodeMap = new LinkedHashMap<>(); InMemoryTreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); - long starTreeDataLength = StarTreeWriter.writeStarTree(dataOut, root, 7, "star-tree"); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, 7, "star-tree"); // asserting on the actual length of the star tree data file assertEquals(starTreeDataLength, 247); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java index 2df661aff8195..0ebca69e518cb 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java @@ -146,7 +146,8 @@ public void test_starTreeMetadata() throws IOException { dataFilePointer = randomNonNegativeLong(); segmentDocumentCount = randomNonNegativeInt(); metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); - StarTreeWriter.writeStarTreeMetadata( + StarTreeWriter starTreeWriter = new StarTreeWriter(); + starTreeWriter.writeStarTreeMetadata( metaOut, starTreeField, metricAggregatorInfos, diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTest.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTest.java new file mode 100644 index 0000000000000..bf9621bda1ba7 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTest.java @@ -0,0 +1,79 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.utils; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.index.VectorSimilarityFunction; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +public class StarTreeUtilsTest extends OpenSearchTestCase { + + public void testFullyQualifiedFieldNameForStarTreeDimensionsDocValues() { + String starTreeFieldName = "myStarTreeField"; + String dimensionName = "dimension1"; + String expectedFieldName = "myStarTreeField_dimension1_dim"; + + String actualFieldName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeDimensionsDocValues(starTreeFieldName, dimensionName); + assertEquals(expectedFieldName, actualFieldName); + } + + public void testFullyQualifiedFieldNameForStarTreeMetricsDocValues() { + String starTreeFieldName = "myStarTreeField"; + String fieldName = "myField"; + String metricName = "metric1"; + String expectedFieldName = "myStarTreeField_myField_metric1_metric"; + + String actualFieldName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues(starTreeFieldName, fieldName, metricName); + assertEquals(expectedFieldName, actualFieldName); + } + + public void testGetFieldInfoList() { + List fieldNames = Arrays.asList("field1", "field2", "field3"); + FieldInfo[] actualFieldInfos = StarTreeUtils.getFieldInfoList(fieldNames); + for (int i = 0; i < fieldNames.size(); i++) { + assertFieldInfos(actualFieldInfos[i], fieldNames.get(i), i); + } + } + + public void testGetFieldInfo() { + String fieldName = UUID.randomUUID().toString(); + int fieldNumber = randomInt(); + assertFieldInfos(StarTreeUtils.getFieldInfo(fieldName, fieldNumber), fieldName, fieldNumber); + + } + + private void assertFieldInfos(FieldInfo actualFieldInfo, String fieldName, Integer fieldNumber){ + assertEquals(fieldName, actualFieldInfo.name); + assertEquals(fieldNumber, actualFieldInfo.number, 0); + assertFalse(actualFieldInfo.hasVectorValues()); + assertTrue(actualFieldInfo.hasNorms()); + assertFalse(actualFieldInfo.hasVectors()); + assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, actualFieldInfo.getIndexOptions()); + assertEquals(DocValuesType.SORTED_NUMERIC, actualFieldInfo.getDocValuesType()); + assertEquals(-1, actualFieldInfo.getDocValuesGen()); + assertEquals(Collections.emptyMap(), actualFieldInfo.attributes()); + assertEquals(0, actualFieldInfo.getPointDimensionCount()); + assertEquals(0, actualFieldInfo.getPointIndexDimensionCount()); + assertEquals(0, actualFieldInfo.getPointNumBytes()); + assertEquals(0, actualFieldInfo.getVectorDimension()); + assertEquals(VectorEncoding.FLOAT32, actualFieldInfo.getVectorEncoding()); + assertEquals(VectorSimilarityFunction.EUCLIDEAN, actualFieldInfo.getVectorSimilarityFunction()); + assertFalse(actualFieldInfo.isSoftDeletesField()); + } + + +} From a80547265bb624d0b54c07cc890400bb7a8d28ee Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Sun, 25 Aug 2024 15:51:46 +0530 Subject: [PATCH 04/11] adding tests Signed-off-by: Sarthak Aggarwal --- .../node/FixedLengthStarTreeNode.java | 16 ++- .../datacube/startree/node/StarTreeNode.java | 2 +- .../data/StarTreeFileFormatsTests.java | 127 ++++++------------ ...UtilsTest.java => StarTreeUtilsTests.java} | 5 +- 4 files changed, 53 insertions(+), 97 deletions(-) rename server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/{StarTreeUtilsTest.java => StarTreeUtilsTests.java} (97%) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java index 96d82070abcf8..f58159535e6e8 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -187,7 +187,7 @@ public byte getStarTreeNodeType() throws IOException { } @Override - public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException { + public StarTreeNode getChildForDimensionValue(Long dimensionValue, boolean isStar) throws IOException { // there will be no children for leaf nodes if (isLeaf()) { return null; @@ -197,9 +197,11 @@ public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isSta if (isStar) { return handleStarNode(); } - - StarTreeNode resultStarTreeNode = binarySearchChild(dimensionValue); - assert null != resultStarTreeNode; + StarTreeNode resultStarTreeNode = null; + if (null != dimensionValue) { + resultStarTreeNode = binarySearchChild(dimensionValue); + assert null != resultStarTreeNode; + } return resultStarTreeNode; } @@ -232,11 +234,11 @@ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IO while (low <= high) { int mid = low + (high - low) / 2; FixedLengthStarTreeNode midNode = new FixedLengthStarTreeNode(in, mid); - long midNodeDimensionValue = midNode.getDimensionValue(); + long midDimensionValue = midNode.getDimensionValue(); - if (midNodeDimensionValue == dimensionValue) { + if (midDimensionValue == dimensionValue) { return midNode; - } else if (midNodeDimensionValue < dimensionValue) { + } else if (midDimensionValue < dimensionValue) { low = mid + 1; } else { high = mid - 1; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java index dd9d301096f44..d29d9145853ad 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java @@ -107,7 +107,7 @@ public interface StarTreeNode { * @return the child node for the given dimension value or null if child is not present * @throws IOException if an I/O error occurs while retrieving the child node */ - StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isStar) throws IOException; + StarTreeNode getChildForDimensionValue(Long dimensionValue, boolean isStar) throws IOException; /** * Returns an iterator over the children of the current star-tree node. diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java index ebef961a33d5f..0de2fc2966bd1 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -36,22 +36,26 @@ public class StarTreeFileFormatsTests extends OpenSearchTestCase { private IndexOutput dataOut; private IndexInput dataIn; private Directory directory; + private Integer maxLevels; + private static Integer dimensionValue; @Before public void setup() throws IOException { directory = newFSDirectory(createTempDir()); + maxLevels = randomIntBetween(2, 5); + dimensionValue = 0; } public void test_StarTreeNode() throws IOException { dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); - Map levelOrderStarTreeNodeMap = new LinkedHashMap<>(); - InMemoryTreeNode root = generateSampleTree(levelOrderStarTreeNodeMap); + Map inMemoryTreeNodeMap = new LinkedHashMap<>(); + InMemoryTreeNode root = generateSampleTree(inMemoryTreeNodeMap); StarTreeWriter starTreeWriter = new StarTreeWriter(); - long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, 7, "star-tree"); + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, inMemoryTreeNodeMap.size(), "star-tree"); // asserting on the actual length of the star tree data file - assertEquals(starTreeDataLength, 247); + assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L) + 16); dataOut.close(); dataIn = directory.openInput("star-tree-data", IOContext.READONCE); @@ -68,7 +72,7 @@ public void test_StarTreeNode() throws IOException { while ((starTreeNode = queue.poll()) != null) { // verify the star node - assertStarTreeNode(starTreeNode, levelOrderStarTreeNodeMap.get(starTreeNode.getDimensionValue())); + assertStarTreeNode(starTreeNode, inMemoryTreeNodeMap.get(starTreeNode.getDimensionValue())); Iterator childrenIterator = starTreeNode.getChildrenIterator(); @@ -77,7 +81,7 @@ public void test_StarTreeNode() throws IOException { StarTreeNode child = childrenIterator.next(); assertStarTreeNode( starTreeNode.getChildForDimensionValue(child.getDimensionValue(), false), - levelOrderStarTreeNodeMap.get(child.getDimensionValue()) + inMemoryTreeNodeMap.get(child.getDimensionValue()) ); queue.add(child); } @@ -107,98 +111,49 @@ private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode tree } - private InMemoryTreeNode generateSampleTree(Map levelOrderStarTreeNode) { + public InMemoryTreeNode generateSampleTree(Map inMemoryTreeNodeMap) { // Create the root node InMemoryTreeNode root = new InMemoryTreeNode(); root.dimensionId = 0; - root.startDocId = 0; - root.endDocId = 100; + root.startDocId = randomInt(); + root.endDocId = randomInt(); root.childDimensionId = 1; root.aggregatedDocId = randomInt(); root.nodeType = (byte) 0; root.children = new HashMap<>(); - levelOrderStarTreeNode.put(root.dimensionValue, root); + inMemoryTreeNodeMap.put(root.dimensionValue, root); - // Create child nodes for dimension 1 - InMemoryTreeNode dim1Node1 = new InMemoryTreeNode(); - dim1Node1.dimensionId = 1; - dim1Node1.dimensionValue = 1; - dim1Node1.startDocId = 0; - dim1Node1.endDocId = 50; - dim1Node1.childDimensionId = 2; - dim1Node1.aggregatedDocId = randomInt(); - root.nodeType = (byte) 0; - dim1Node1.children = new HashMap<>(); - - InMemoryTreeNode dim1Node2 = new InMemoryTreeNode(); - dim1Node2.dimensionId = 1; - dim1Node2.dimensionValue = 2; - dim1Node2.startDocId = 50; - dim1Node2.endDocId = 100; - dim1Node2.childDimensionId = 2; - dim1Node2.aggregatedDocId = randomInt(); - root.nodeType = (byte) 0; - dim1Node2.children = new HashMap<>(); - - root.children.put(1L, dim1Node1); - root.children.put(2L, dim1Node2); - - levelOrderStarTreeNode.put(dim1Node1.dimensionValue, dim1Node1); - levelOrderStarTreeNode.put(dim1Node2.dimensionValue, dim1Node2); - - // Create child nodes for dimension 2 - InMemoryTreeNode dim2Node1 = new InMemoryTreeNode(); - dim2Node1.dimensionId = 2; - dim2Node1.dimensionValue = 3; - dim2Node1.startDocId = 0; - dim2Node1.endDocId = 25; - dim2Node1.childDimensionId = -1; - dim2Node1.aggregatedDocId = randomInt(); - root.nodeType = (byte) 0; - dim2Node1.children = null; - - InMemoryTreeNode dim2Node2 = new InMemoryTreeNode(); - dim2Node2.dimensionId = 2; - dim2Node2.dimensionValue = 4; - dim2Node2.startDocId = 25; - dim2Node2.endDocId = 50; - dim2Node2.childDimensionId = -1; - dim2Node2.aggregatedDocId = randomInt(); - root.nodeType = (byte) 0; - dim2Node2.children = null; - - InMemoryTreeNode dim2Node3 = new InMemoryTreeNode(); - dim2Node3.dimensionId = 2; - dim2Node3.dimensionValue = 5; - dim2Node3.startDocId = 50; - dim2Node3.endDocId = 75; - dim2Node3.childDimensionId = -1; - dim2Node3.aggregatedDocId = randomInt(); - root.nodeType = (byte) 0; - dim2Node3.children = null; - - InMemoryTreeNode dim2Node4 = new InMemoryTreeNode(); - dim2Node4.dimensionId = 2; - dim2Node4.dimensionValue = 6; - dim2Node4.startDocId = 75; - dim2Node4.endDocId = 100; - dim2Node4.childDimensionId = -1; - dim2Node4.aggregatedDocId = randomInt(); - root.nodeType = (byte) 0; - dim2Node4.children = null; + // Generate the tree recursively + generateTreeRecursively(root, 1, inMemoryTreeNodeMap); + + return root; + } - dim1Node1.children.put(3L, dim2Node1); - dim1Node1.children.put(4L, dim2Node2); - dim1Node2.children.put(5L, dim2Node3); - dim1Node2.children.put(6L, dim2Node4); + private void generateTreeRecursively(InMemoryTreeNode parent, int currentLevel, Map inMemoryTreeNodeMap) { + if (currentLevel >= this.maxLevels) { + return; // Maximum level reached, stop generating children + } - levelOrderStarTreeNode.put(dim2Node1.dimensionValue, dim2Node1); - levelOrderStarTreeNode.put(dim2Node2.dimensionValue, dim2Node2); - levelOrderStarTreeNode.put(dim2Node3.dimensionValue, dim2Node3); - levelOrderStarTreeNode.put(dim2Node4.dimensionValue, dim2Node4); + int numChildren = randomIntBetween(1, 10); - return root; + for (int i = 0; i < numChildren; i++) { + InMemoryTreeNode child = new InMemoryTreeNode(); + dimensionValue++; + child.dimensionId = currentLevel; + child.dimensionValue = dimensionValue; // Assign a unique dimension value for each child + child.startDocId = randomInt(); + child.endDocId = randomInt(); + child.childDimensionId = (currentLevel == this.maxLevels - 1) ? -1 : (currentLevel + 1); + child.aggregatedDocId = randomInt(); + child.nodeType = (byte) 0; + child.children = new HashMap<>(); + + parent.children.put(child.dimensionValue, child); + inMemoryTreeNodeMap.put(child.dimensionValue, child); + + generateTreeRecursively(child, currentLevel + 1, inMemoryTreeNodeMap); + } } public void tearDown() throws Exception { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTest.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java similarity index 97% rename from server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTest.java rename to server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java index bf9621bda1ba7..a1d341615969e 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTest.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeUtilsTests.java @@ -20,7 +20,7 @@ import java.util.List; import java.util.UUID; -public class StarTreeUtilsTest extends OpenSearchTestCase { +public class StarTreeUtilsTests extends OpenSearchTestCase { public void testFullyQualifiedFieldNameForStarTreeDimensionsDocValues() { String starTreeFieldName = "myStarTreeField"; @@ -56,7 +56,7 @@ public void testGetFieldInfo() { } - private void assertFieldInfos(FieldInfo actualFieldInfo, String fieldName, Integer fieldNumber){ + private void assertFieldInfos(FieldInfo actualFieldInfo, String fieldName, Integer fieldNumber) { assertEquals(fieldName, actualFieldInfo.name); assertEquals(fieldNumber, actualFieldInfo.number, 0); assertFalse(actualFieldInfo.hasVectorValues()); @@ -75,5 +75,4 @@ private void assertFieldInfos(FieldInfo actualFieldInfo, String fieldName, Integ assertFalse(actualFieldInfo.isSoftDeletesField()); } - } From 9117e628339333cfa380a50913a7ba77f6e626f4 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Sun, 25 Aug 2024 17:17:18 +0530 Subject: [PATCH 05/11] adding more tests Signed-off-by: Sarthak Aggarwal --- .../node/FixedLengthStarTreeNode.java | 16 +- .../data/StarTreeFileFormatsTests.java | 53 ++++- .../fileformats/meta/StarTreeMetaTests.java | 6 +- .../node/FixedLengthStarTreeNodeTests.java | 181 ++++++++++++++++++ .../startree/node/StarTreeNodeTypeTests.java | 58 ++++++ .../index/mapper/StarTreeMapperTests.java | 6 + 6 files changed, 306 insertions(+), 14 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java create mode 100644 server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java index f58159535e6e8..df0662ccb1fb1 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java @@ -58,14 +58,14 @@ public class FixedLengthStarTreeNode implements StarTreeNode { * NUM_LONG_SERIALIZABLE_FIELDS) + (NUM_BYTE_SERIALIZABLE_FIELDS * Byte.BYTES); // Byte offsets for each field in the serialized data - private static final int DIMENSION_ID_OFFSET = 0; - private static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; - private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; - private static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; - private static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; - private static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Integer.BYTES; - private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES; - private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; + static final int DIMENSION_ID_OFFSET = 0; + static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES; + static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES; + static final int END_DOC_ID_OFFSET = START_DOC_ID_OFFSET + Integer.BYTES; + static final int AGGREGATE_DOC_ID_OFFSET = END_DOC_ID_OFFSET + Integer.BYTES; + static final int STAR_NODE_TYPE_OFFSET = AGGREGATE_DOC_ID_OFFSET + Integer.BYTES; + static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES; + static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES; /** * Constant representing an invalid node ID diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java index 0de2fc2966bd1..b63af7f493a3c 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -17,6 +17,7 @@ import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -79,12 +80,18 @@ public void test_StarTreeNode() throws IOException { if (starTreeNode.getChildDimensionId() != -1) { while (childrenIterator.hasNext()) { StarTreeNode child = childrenIterator.next(); - assertStarTreeNode( - starTreeNode.getChildForDimensionValue(child.getDimensionValue(), false), - inMemoryTreeNodeMap.get(child.getDimensionValue()) - ); + if (child.getStarTreeNodeType() == StarTreeNodeType.DEFAULT.getValue()) { + assertStarTreeNode( + starTreeNode.getChildForDimensionValue(child.getDimensionValue(), false), + inMemoryTreeNodeMap.get(child.getDimensionValue()) + ); + assertNull(starTreeNode.getChildForDimensionValue(child.getDimensionValue(), true)); + } + queue.add(child); } + } else { + assertTrue(starTreeNode.isLeaf()); } } @@ -92,6 +99,43 @@ public void test_StarTreeNode() throws IOException { } + public void test_starTreeSearch() throws IOException { + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + Map inMemoryTreeNodeMap = new LinkedHashMap<>(); + InMemoryTreeNode root = generateSampleTree(inMemoryTreeNodeMap); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, inMemoryTreeNodeMap.size(), "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L) + 16); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + StarTree starTree = new StarTree(dataIn, starTreeMetadata); + + StarTreeNode starTreeNode = starTree.getRoot(); + InMemoryTreeNode inMemoryTreeNode = inMemoryTreeNodeMap.get(starTreeNode.getDimensionValue()); + assertNotNull(inMemoryTreeNode); + + for (int i = 0; i < maxLevels - 1; i++) { + InMemoryTreeNode randomChildNode = randomFrom(inMemoryTreeNode.children.values()); + StarTreeNode randomStarTreeChildNode = starTreeNode.getChildForDimensionValue(randomChildNode.dimensionValue, false); + + assertNotNull(randomStarTreeChildNode); + assertStarTreeNode(randomStarTreeChildNode, randomChildNode); + + starTreeNode = randomStarTreeChildNode; + inMemoryTreeNode = randomChildNode; + + } + dataIn.close(); + } + private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode treeNode) throws IOException { assertEquals(starTreeNode.getDimensionId(), treeNode.dimensionId); assertEquals(starTreeNode.getDimensionValue(), treeNode.dimensionValue); @@ -99,6 +143,7 @@ private void assertStarTreeNode(StarTreeNode starTreeNode, InMemoryTreeNode tree assertEquals(starTreeNode.getEndDocId(), treeNode.endDocId); assertEquals(starTreeNode.getChildDimensionId(), treeNode.childDimensionId); assertEquals(starTreeNode.getAggregatedDocId(), treeNode.aggregatedDocId); + assertEquals(starTreeNode.getStarTreeNodeType(), treeNode.nodeType); if (starTreeNode.getChildDimensionId() != -1) { assertFalse(starTreeNode.isLeaf()); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java index 0ebca69e518cb..6cd61d2747959 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java @@ -41,8 +41,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.UUID; import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; @@ -125,7 +125,7 @@ public void test_starTreeMetadata() throws IOException { int maxLeafDocs = randomNonNegativeInt(); StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( maxLeafDocs, - new HashSet<>(), + Set.of("field10"), StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP ); starTreeField = new StarTreeField("star_tree", dimensionsOrder, metrics, starTreeFieldConfiguration); @@ -168,8 +168,10 @@ public void test_starTreeMetadata() throws IOException { ); StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType); + assertEquals(starTreeField.getName(), starTreeMetadata.getStarTreeFieldName()); assertEquals(starTreeField.getName(), starTreeMetadata.getCompositeFieldName()); assertEquals(STAR_TREE, starTreeMetadata.getCompositeFieldType()); + assertEquals(STAR_TREE.getName(), starTreeMetadata.getStarTreeFieldType()); assertNotNull(starTreeMetadata); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java new file mode 100644 index 0000000000000..0e8dcdec914b5 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java @@ -0,0 +1,181 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class FixedLengthStarTreeNodeTests extends OpenSearchTestCase { + + private IndexOutput dataOut; + private IndexInput dataIn; + private Directory directory; + InMemoryTreeNode node; + InMemoryTreeNode starChild; + FixedLengthStarTreeNode starTreeNode; + + @Before + public void setup() throws IOException { + directory = newFSDirectory(createTempDir()); + + dataOut = directory.createOutput("star-tree-data", IOContext.DEFAULT); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + + node = new InMemoryTreeNode(); + node.dimensionId = 0; + node.startDocId = randomInt(); + node.endDocId = randomInt(); + node.childDimensionId = 1; + node.aggregatedDocId = randomInt(); + node.nodeType = randomFrom((byte) 0, (byte) -1, (byte) 2); + node.children = new HashMap<>(); + + starChild = new InMemoryTreeNode(); + starChild.dimensionId = node.dimensionId + 1; + starChild.dimensionValue = -1; + starChild.startDocId = randomInt(); + starChild.endDocId = randomInt(); + starChild.childDimensionId = -1; + starChild.aggregatedDocId = randomInt(); + starChild.nodeType = (byte) -2; + starChild.children = new HashMap<>(); + node.children.put(-1L, starChild); + + for (int i = 1; i < randomIntBetween(2, 5); i++) { + InMemoryTreeNode child = new InMemoryTreeNode(); + child.dimensionId = node.dimensionId + 1; + child.dimensionValue = node.dimensionValue + i; // Assign a unique dimension value for each child + child.startDocId = randomInt(); + child.endDocId = randomInt(); + child.childDimensionId = -1; + child.aggregatedDocId = randomInt(); + child.nodeType = (byte) 0; + child.children = new HashMap<>(); + node.children.put(child.dimensionValue, child); + } + + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 1 + node.children.size(), "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, 33L * node.children.size() + 33 + 16); + dataOut.close(); + + dataIn = directory.openInput("star-tree-data", IOContext.READONCE); + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + StarTree starTree = new StarTree(dataIn, starTreeMetadata); + + starTreeNode = (FixedLengthStarTreeNode) starTree.getRoot(); + + } + + public void testOffsets() { + assertEquals(0, FixedLengthStarTreeNode.DIMENSION_ID_OFFSET); + assertEquals(4, FixedLengthStarTreeNode.DIMENSION_VALUE_OFFSET); + assertEquals(12, FixedLengthStarTreeNode.START_DOC_ID_OFFSET); + assertEquals(16, FixedLengthStarTreeNode.END_DOC_ID_OFFSET); + assertEquals(20, FixedLengthStarTreeNode.AGGREGATE_DOC_ID_OFFSET); + assertEquals(24, FixedLengthStarTreeNode.STAR_NODE_TYPE_OFFSET); + assertEquals(25, FixedLengthStarTreeNode.FIRST_CHILD_ID_OFFSET); + assertEquals(29, FixedLengthStarTreeNode.LAST_CHILD_ID_OFFSET); + } + + public void testSerializableDataSize() { + assertEquals(33, FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES); + } + + public void testGetDimensionId() throws IOException { + assertEquals(node.dimensionId, starTreeNode.getDimensionId()); + } + + public void testGetDimensionValue() throws IOException { + assertEquals(node.dimensionValue, starTreeNode.getDimensionValue()); + } + + public void testGetStartDocId() throws IOException { + assertEquals(node.startDocId, starTreeNode.getStartDocId()); + } + + public void testGetEndDocId() throws IOException { + assertEquals(node.endDocId, starTreeNode.getEndDocId()); + } + + public void testGetAggregatedDocId() throws IOException { + assertEquals(node.aggregatedDocId, starTreeNode.getAggregatedDocId()); + } + + public void testGetNumChildren() throws IOException { + assertEquals(node.children.size(), starTreeNode.getNumChildren()); + } + + public void testIsLeaf() { + assertFalse(starTreeNode.isLeaf()); + } + + public void testGetStarTreeNodeType() throws IOException { + assertEquals(node.getNodeType(), starTreeNode.getStarTreeNodeType()); + } + + public void testGetChildForDimensionValue() throws IOException { + long dimensionValue = randomIntBetween(0, node.children.size() - 2); + FixedLengthStarTreeNode childNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(dimensionValue, false); + assertNotNull(childNode); + assertEquals(dimensionValue, childNode.getDimensionValue()); + } + + public void testGetChildrenIterator() throws IOException { + Iterator iterator = starTreeNode.getChildrenIterator(); + int count = 0; + while (iterator.hasNext()) { + FixedLengthStarTreeNode child = iterator.next(); + assertNotNull(child); + count++; + } + assertEquals(starTreeNode.getNumChildren(), count); + } + + public void testGetChildForStarNode() throws IOException { + // Assuming the first child is a star node in our test data + FixedLengthStarTreeNode starNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue((long) StarTreeUtils.ALL, true); + assertNotNull(starNode); + assertEquals(StarTreeUtils.ALL, starNode.getDimensionValue()); + } + + public void testGetChildForNullNode() throws IOException { + FixedLengthStarTreeNode nullNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(null, false); + assertNull(nullNode); + } + + public void testGetChildForInvalidDimensionValue() throws IOException { + long invalidDimensionValue = Long.MAX_VALUE; + assertThrows(AssertionError.class, () -> starTreeNode.getChildForDimensionValue(invalidDimensionValue, false)); + } + + public void tearDown() throws Exception { + super.tearDown(); + dataIn.close(); + dataOut.close(); + directory.close(); + } +} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java new file mode 100644 index 0000000000000..81fb620da5af3 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeTypeTests.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.opensearch.test.OpenSearchTestCase; + +public class StarTreeNodeTypeTests extends OpenSearchTestCase { + + public void testStarNodeType() { + assertEquals("star", StarTreeNodeType.STAR.getName()); + assertEquals((byte) -2, StarTreeNodeType.STAR.getValue()); + } + + public void testNullNodeType() { + assertEquals("null", StarTreeNodeType.NULL.getName()); + assertEquals((byte) -1, StarTreeNodeType.NULL.getValue()); + } + + public void testDefaultNodeType() { + assertEquals("default", StarTreeNodeType.DEFAULT.getName()); + assertEquals((byte) 0, StarTreeNodeType.DEFAULT.getValue()); + } + + public void testFromValue() { + assertEquals(StarTreeNodeType.STAR, StarTreeNodeType.fromValue((byte) -2)); + assertEquals(StarTreeNodeType.NULL, StarTreeNodeType.fromValue((byte) -1)); + assertEquals(StarTreeNodeType.DEFAULT, StarTreeNodeType.fromValue((byte) 0)); + } + + public void testFromValueInvalid() { + IllegalStateException exception = expectThrows(IllegalStateException.class, () -> StarTreeNodeType.fromValue((byte) 1)); + assertEquals("Unrecognized value byte to determine star-tree node type: [1]", exception.getMessage()); + } + + public void testEnumValues() { + StarTreeNodeType[] values = StarTreeNodeType.values(); + assertEquals(3, values.length); + assertArrayEquals(new StarTreeNodeType[] { StarTreeNodeType.STAR, StarTreeNodeType.NULL, StarTreeNodeType.DEFAULT }, values); + } + + public void testEnumValueOf() { + assertEquals(StarTreeNodeType.STAR, StarTreeNodeType.valueOf("STAR")); + assertEquals(StarTreeNodeType.NULL, StarTreeNodeType.valueOf("NULL")); + assertEquals(StarTreeNodeType.DEFAULT, StarTreeNodeType.valueOf("DEFAULT")); + } + + public void testEnumValueOfInvalid() { + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> StarTreeNodeType.valueOf("INVALID")); + assertTrue(exception.getMessage().contains("No enum constant")); + } + +} diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index 449b251dddca1..b0145dbde4bee 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -21,6 +21,7 @@ import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.ReadDimension; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.junit.After; @@ -332,6 +333,11 @@ public void testDimensions() { assertEquals(n1, n2); n2 = new NumericDimension("name1"); assertNotEquals(n1, n2); + ReadDimension r1 = new ReadDimension("name"); + ReadDimension r2 = new ReadDimension("name"); + assertEquals(r1, r2); + r2 = new ReadDimension("name1"); + assertNotEquals(r1, r2); } public void testStarTreeField() { From 8e11b30901e377ee61d8d9bdb253dba5c8cd2202 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Sun, 25 Aug 2024 17:43:05 +0530 Subject: [PATCH 06/11] removing data header Signed-off-by: Sarthak Aggarwal --- .../startree/fileformats/StarTreeWriter.java | 5 +- .../fileformats/data/StarTreeDataWriter.java | 34 +------------- .../fileformats/meta/StarTreeMetaWriter.java | 9 +++- .../fileformats/meta/StarTreeMetadata.java | 46 ++++++++++++++++++- .../datacube/startree/node/StarTree.java | 34 +------------- .../data/StarTreeFileFormatsTests.java | 4 +- .../fileformats/meta/StarTreeMetaTests.java | 7 ++- .../node/FixedLengthStarTreeNodeTests.java | 2 +- 8 files changed, 66 insertions(+), 75 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java index 19dcc16e7f95f..7f1839024eea7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java @@ -37,7 +37,7 @@ public StarTreeWriter() {} * * @param dataOut data index output * @param rootNode root star-tree node - * @param numNodes number of nodes in the tree + * @param numNodes number of nodes in the star tree * @param name name of the star-tree field * @return total size of the three * @throws IOException when star-tree data serialization fails @@ -52,6 +52,7 @@ public long writeStarTree(IndexOutput dataOut, InMemoryTreeNode rootNode, int nu * @param metaOut meta index output * @param starTreeField star tree field * @param metricAggregatorInfos metric aggregator infos + * @param numNodes number of nodes in the star tree * @param segmentAggregatedCount segment aggregated count * @param dataFilePointer data file pointer * @param dataFileLength data file length @@ -61,6 +62,7 @@ public void writeStarTreeMetadata( IndexOutput metaOut, StarTreeField starTreeField, List metricAggregatorInfos, + Integer numNodes, Integer segmentAggregatedCount, long dataFilePointer, long dataFileLength @@ -69,6 +71,7 @@ public void writeStarTreeMetadata( metaOut, starTreeField, metricAggregatorInfos, + numNodes, segmentAggregatedCount, dataFilePointer, dataFileLength diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java index 5ac47e5927d4f..6f11148dee468 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java @@ -20,8 +20,6 @@ import java.util.List; import java.util.Queue; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; -import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; @@ -45,44 +43,14 @@ public class StarTreeDataWriter { * @throws IOException if an I/O error occurs while writing the star-tree data */ public static long writeStarTree(IndexOutput indexOutput, InMemoryTreeNode rootNode, int numNodes, String name) throws IOException { - long totalSizeInBytes = 0L; - totalSizeInBytes += computeStarTreeDataHeaderByteSize(); - totalSizeInBytes += (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; + long totalSizeInBytes = (long) numNodes * SERIALIZABLE_DATA_SIZE_IN_BYTES; logger.debug("Star tree data size in bytes : {} for star-tree field {}", totalSizeInBytes, name); - writeStarTreeHeader(indexOutput, numNodes); writeStarTreeNodes(indexOutput, rootNode); return totalSizeInBytes; } - /** - * Computes the byte size of the star-tree data header. - * - * @return the byte size of the star-tree data header - */ - public static int computeStarTreeDataHeaderByteSize() { - // Magic marker (8), version (4) - int headerSizeInBytes = 12; - - // For number of nodes. - headerSizeInBytes += Integer.BYTES; - return headerSizeInBytes; - } - - /** - * Writes the star-tree data header. - * - * @param output the IndexOutput to write the header - * @param numNodes the total number of nodes in the star-tree - * @throws IOException if an I/O error occurs while writing the header - */ - private static void writeStarTreeHeader(IndexOutput output, int numNodes) throws IOException { - output.writeLong(COMPOSITE_FIELD_MARKER); - output.writeInt(VERSION_CURRENT); - output.writeInt(numNodes); - } - /** * Writes the star-tree nodes in a breadth-first order. * diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java index 057d5e908d9d6..f7ff91f5928af 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java @@ -40,6 +40,7 @@ public class StarTreeMetaWriter { * @param starTreeField the star-tree field * @param metricAggregatorInfos the list of metric aggregator information * @param segmentAggregatedCount the aggregated document count for the segment + * @param numNodes number of nodes in the star tree * @param dataFilePointer the file pointer to the start of the star tree data * @param dataFileLength the length of the star tree data file * @throws IOException if an I/O error occurs while serializing the metadata @@ -48,6 +49,7 @@ public static void writeStarTreeMetadata( IndexOutput metaOut, StarTreeField starTreeField, List metricAggregatorInfos, + Integer numNodes, Integer segmentAggregatedCount, long dataFilePointer, long dataFileLength @@ -56,7 +58,7 @@ public static void writeStarTreeMetadata( long initialMetaFilePointer = metaOut.getFilePointer(); writeMetaHeader(metaOut, CompositeMappedFieldType.CompositeFieldType.STAR_TREE, starTreeField.getName()); - writeMeta(metaOut, metricAggregatorInfos, starTreeField, segmentAggregatedCount, dataFilePointer, dataFileLength); + writeMeta(metaOut, metricAggregatorInfos, starTreeField, numNodes, segmentAggregatedCount, dataFilePointer, dataFileLength); logger.debug( "Star tree meta size in bytes : {} for star-tree field {}", @@ -97,6 +99,7 @@ private static void writeMetaHeader( * @param metaOut the IndexOutput to write the metadata * @param metricAggregatorInfos the list of metric aggregator information * @param starTreeField the star tree field + * @param numNodes number of nodes in the star tree * @param segmentAggregatedDocCount the aggregated document count for the segment * @param dataFilePointer the file pointer to the start of the star-tree data * @param dataFileLength the length of the star-tree data file @@ -106,11 +109,15 @@ private static void writeMeta( IndexOutput metaOut, List metricAggregatorInfos, StarTreeField starTreeField, + int numNodes, Integer segmentAggregatedDocCount, long dataFilePointer, long dataFileLength ) throws IOException { + // number of nodes + metaOut.writeInt(numNodes); + // number of dimensions metaOut.writeVInt(starTreeField.getDimensionsOrder().size()); diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java index a8a5e3c1b03b1..e8d24cf8f6209 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -35,6 +35,16 @@ public class StarTreeMetadata extends CompositeIndexMetadata { */ private final IndexInput meta; + /** + * The version of the star tree stored in the segments. + */ + private final int version; + + /** + * The number of the nodes in the respective star tree + */ + private final int numberOfNodes; + /** * The name of the star-tree field, used to identify the star-tree. */ @@ -92,15 +102,22 @@ public class StarTreeMetadata extends CompositeIndexMetadata { * @param metaIn an index input to read star-tree meta * @param compositeFieldName name of the composite field. Here, name of the star-tree field. * @param compositeFieldType type of the composite field. Here, STAR_TREE field. + * @param version The version of the star tree stored in the segments. * @throws IOException if unable to read star-tree metadata from the file */ - public StarTreeMetadata(IndexInput metaIn, String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType) - throws IOException { + public StarTreeMetadata( + IndexInput metaIn, + String compositeFieldName, + CompositeMappedFieldType.CompositeFieldType compositeFieldType, + Integer version + ) throws IOException { super(compositeFieldName, compositeFieldType); this.meta = metaIn; try { this.starTreeFieldName = this.getCompositeFieldName(); this.starTreeFieldType = this.getCompositeFieldType().getName(); + this.version = version; + this.numberOfNodes = readNumberOfNodes(); this.dimensionFields = readStarTreeDimensions(); this.metricEntries = readMetricEntries(); this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); @@ -122,6 +139,7 @@ public StarTreeMetadata(IndexInput metaIn, String compositeFieldName, CompositeM * @param meta an index input to read star-tree meta * @param compositeFieldName name of the composite field. Here, name of the star-tree field. * @param compositeFieldType type of the composite field. Here, STAR_TREE field. + * @param version The version of the star tree stored in the segments. * @param dimensionFields list of dimension fields * @param metricEntries list of metric entries * @param segmentAggregatedDocCount segment aggregated doc count @@ -135,6 +153,8 @@ public StarTreeMetadata( String compositeFieldName, CompositeMappedFieldType.CompositeFieldType compositeFieldType, IndexInput meta, + Integer version, + Integer numberOfNodes, List dimensionFields, List metricEntries, Integer segmentAggregatedDocCount, @@ -148,6 +168,8 @@ public StarTreeMetadata( this.meta = meta; this.starTreeFieldName = compositeFieldName; this.starTreeFieldType = compositeFieldType.getName(); + this.version = version; + this.numberOfNodes = numberOfNodes; this.dimensionFields = dimensionFields; this.metricEntries = metricEntries; this.segmentAggregatedDocCount = segmentAggregatedDocCount; @@ -158,6 +180,10 @@ public StarTreeMetadata( this.dataLength = dataLength; } + private int readNumberOfNodes() throws IOException { + return meta.readInt(); + } + private int readDimensionsCount() throws IOException { return meta.readVInt(); } @@ -313,4 +339,20 @@ public long getDataStartFilePointer() { public long getDataLength() { return dataLength; } + + /** + * Returns the version with which the star tree is stored in the segments + * @return star-tree version + */ + public int getVersion() { + return version; + } + + /** + * Returns the number of nodes in the star tree + * @return number of nodes in the star tree + */ + public int getNumberOfNodes() { + return numberOfNodes; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java index 4ed3c3ec9febe..93d0cc93e8843 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -7,45 +7,22 @@ */ package org.opensearch.index.compositeindex.datacube.startree.node; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RandomAccessInput; -import org.opensearch.index.compositeindex.datacube.startree.fileformats.data.StarTreeDataWriter; import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; import java.io.IOException; -import static org.opensearch.index.compositeindex.CompositeIndexConstants.COMPOSITE_FIELD_MARKER; -import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; - /** * Off heap implementation of the star-tree. * * @opensearch.experimental */ public class StarTree { - private static final Logger logger = LogManager.getLogger(StarTree.class); private final FixedLengthStarTreeNode root; - private final Integer numNodes; public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { - long magicMarker = data.readLong(); - if (COMPOSITE_FIELD_MARKER != magicMarker) { - logger.error("Invalid magic marker"); - throw new IOException("Invalid magic marker"); - } - int version = data.readInt(); - if (VERSION_CURRENT != version) { - logger.error("Invalid star tree version"); - throw new IOException("Invalid version"); - } - numNodes = data.readInt(); // num nodes - - RandomAccessInput in = data.randomAccessSlice( - StarTreeDataWriter.computeStarTreeDataHeaderByteSize(), - starTreeMetadata.getDataLength() - StarTreeDataWriter.computeStarTreeDataHeaderByteSize() - ); + RandomAccessInput in = data.randomAccessSlice(0, starTreeMetadata.getDataLength()); root = new FixedLengthStarTreeNode(in, 0); } @@ -53,13 +30,4 @@ public StarTreeNode getRoot() { return root; } - /** - * Returns the number of nodes in star-tree - * - * @return number of nodes in te star-tree - */ - public Integer getNumNodes() { - return numNodes; - } - } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java index b63af7f493a3c..5f7a48f8f731c 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -56,7 +56,7 @@ public void test_StarTreeNode() throws IOException { long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, inMemoryTreeNodeMap.size(), "star-tree"); // asserting on the actual length of the star tree data file - assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L) + 16); + assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L)); dataOut.close(); dataIn = directory.openInput("star-tree-data", IOContext.READONCE); @@ -108,7 +108,7 @@ public void test_starTreeSearch() throws IOException { long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, root, inMemoryTreeNodeMap.size(), "star-tree"); // asserting on the actual length of the star tree data file - assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L) + 16); + assertEquals(starTreeDataLength, (inMemoryTreeNodeMap.size() * 33L)); dataOut.close(); dataIn = directory.openInput("star-tree-data", IOContext.READONCE); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java index 6cd61d2747959..b3845e2d31e2f 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java @@ -147,10 +147,12 @@ public void test_starTreeMetadata() throws IOException { segmentDocumentCount = randomNonNegativeInt(); metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); StarTreeWriter starTreeWriter = new StarTreeWriter(); + int numberOfNodes = randomNonNegativeInt(); starTreeWriter.writeStarTreeMetadata( metaOut, starTreeField, metricAggregatorInfos, + numberOfNodes, segmentDocumentCount, dataFilePointer, dataFileLength @@ -167,12 +169,13 @@ public void test_starTreeMetadata() throws IOException { metaIn.readString() ); - StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType); + StarTreeMetadata starTreeMetadata = new StarTreeMetadata(metaIn, compositeFieldName, compositeFieldType, VERSION_CURRENT); assertEquals(starTreeField.getName(), starTreeMetadata.getStarTreeFieldName()); assertEquals(starTreeField.getName(), starTreeMetadata.getCompositeFieldName()); assertEquals(STAR_TREE, starTreeMetadata.getCompositeFieldType()); assertEquals(STAR_TREE.getName(), starTreeMetadata.getStarTreeFieldType()); - + assertEquals(starTreeMetadata.getVersion(), VERSION_CURRENT); + assertEquals(starTreeMetadata.getNumberOfNodes(), numberOfNodes); assertNotNull(starTreeMetadata); for (int i = 0; i < dimensionsOrder.size(); i++) { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java index 0e8dcdec914b5..61163a37582ad 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java @@ -77,7 +77,7 @@ public void setup() throws IOException { long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 1 + node.children.size(), "star-tree"); // asserting on the actual length of the star tree data file - assertEquals(starTreeDataLength, 33L * node.children.size() + 33 + 16); + assertEquals(starTreeDataLength, 33L * node.children.size() + 33); dataOut.close(); dataIn = directory.openInput("star-tree-data", IOContext.READONCE); From e74b90d7b25a66cba24086d3aa3aade9b8b65e90 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Tue, 27 Aug 2024 12:52:10 +0530 Subject: [PATCH 07/11] separated functions for star node, removed metric entry and tree node Signed-off-by: Sarthak Aggarwal --- .../startree/builder/BaseStarTreeBuilder.java | 43 ++++++------ .../fileformats/data/StarTreeDataWriter.java | 2 +- .../fileformats/meta/MetricEntry.java | 55 --------------- .../fileformats/meta/StarTreeMetaWriter.java | 2 + .../fileformats/meta/StarTreeMetadata.java | 31 +++++---- .../node/FixedLengthStarTreeNode.java | 19 ++--- .../fileformats/node/package-info.java | 12 ++++ .../datacube/startree/node/StarTree.java | 1 + .../datacube/startree/node/StarTreeNode.java | 12 +++- .../datacube/startree/utils/TreeNode.java | 69 ------------------- .../builder/AbstractStarTreeBuilderTests.java | 45 +++++++----- .../data/StarTreeFileFormatsTests.java | 6 +- .../fileformats/meta/StarTreeMetaTests.java | 17 +++-- .../node/FixedLengthStarTreeNodeTests.java | 12 ++-- 14 files changed, 125 insertions(+), 201 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java rename server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/{ => fileformats}/node/FixedLengthStarTreeNode.java (94%) create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java delete mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java rename server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/{ => fileformats}/node/FixedLengthStarTreeNodeTests.java (93%) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index ddcf02cc6291a..d4bb1cc3dc1f7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -26,8 +26,9 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo; import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.Mapper; @@ -46,7 +47,7 @@ import java.util.Objects; import java.util.Set; -import static org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode.ALL; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; /** * Builder for star tree. Defines the algorithm to construct star-tree @@ -72,7 +73,7 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder { protected int numStarTreeNodes; protected final int maxLeafDocuments; - protected final TreeNode rootNode = getNewNode(); + protected final InMemoryTreeNode rootNode = getNewNode(); protected final StarTreeField starTreeField; private final SegmentWriteState state; @@ -578,9 +579,9 @@ private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOExcept * * @return return new star-tree node */ - private TreeNode getNewNode() { + private InMemoryTreeNode getNewNode() { numStarTreeNodes++; - return new TreeNode(); + return new InMemoryTreeNode(); } /** @@ -591,7 +592,7 @@ private TreeNode getNewNode() { * @param endDocId end document id * @throws IOException throws an exception if we are unable to construct the tree */ - private void constructStarTree(TreeNode node, int startDocId, int endDocId) throws IOException { + private void constructStarTree(InMemoryTreeNode node, int startDocId, int endDocId) throws IOException { int childDimensionId = node.dimensionId + 1; if (childDimensionId == numDimensions) { @@ -600,7 +601,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro // Construct all non-star children nodes node.childDimensionId = childDimensionId; - Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); + Map children = constructNonStarNodes(startDocId, endDocId, childDimensionId); node.children = children; // Construct star-node if required @@ -609,7 +610,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro } // Further split on child nodes if required - for (TreeNode child : children.values()) { + for (InMemoryTreeNode child : children.values()) { if (child.endDocId - child.startDocId > maxLeafDocuments) { constructStarTree(child, child.startDocId, child.endDocId); } @@ -625,14 +626,14 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro * @return root node with non-star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { - Map nodes = new HashMap<>(); + private Map constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException { + Map nodes = new HashMap<>(); int nodeStartDocId = startDocId; Long nodeDimensionValue = getDimensionValue(startDocId, dimensionId); for (int i = startDocId + 1; i < endDocId; i++) { Long dimensionValue = getDimensionValue(i, dimensionId); if (Objects.equals(dimensionValue, nodeDimensionValue) == false) { - TreeNode child = getNewNode(); + InMemoryTreeNode child = getNewNode(); child.dimensionId = dimensionId; child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; child.startDocId = nodeStartDocId; @@ -643,7 +644,7 @@ private Map constructNonStarNodes(int startDocId, int endDocId, nodeDimensionValue = dimensionValue; } } - TreeNode lastNode = getNewNode(); + InMemoryTreeNode lastNode = getNewNode(); lastNode.dimensionId = dimensionId; lastNode.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL; lastNode.startDocId = nodeStartDocId; @@ -661,11 +662,11 @@ private Map constructNonStarNodes(int startDocId, int endDocId, * @return root node with star nodes constructed * @throws IOException throws an exception if we are unable to construct non-star nodes */ - private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { - TreeNode starNode = getNewNode(); + private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException { + InMemoryTreeNode starNode = getNewNode(); starNode.dimensionId = dimensionId; starNode.dimensionValue = ALL; - starNode.isStarNode = true; + starNode.nodeType = StarTreeNodeType.STAR.getValue(); starNode.startDocId = numStarTreeDocs; Iterator starTreeDocumentIterator = generateStarTreeDocumentsForStarNode(startDocId, endDocId, dimensionId); while (starTreeDocumentIterator.hasNext()) { @@ -682,7 +683,7 @@ private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId * @return aggregated star-tree documents * @throws IOException throws an exception upon failing to create new aggregated docs based on star tree */ - private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException { + private StarTreeDocument createAggregatedDocs(InMemoryTreeNode node) throws IOException { StarTreeDocument aggregatedStarTreeDocument = null; if (node.children == null) { @@ -709,8 +710,8 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException // For non-leaf node if (node.children.containsKey((long) ALL)) { // If it has star child, use the star child aggregated document directly - for (TreeNode child : node.children.values()) { - if (child.isStarNode) { + for (InMemoryTreeNode child : node.children.values()) { + if (child.nodeType == StarTreeNodeType.STAR.getValue()) { aggregatedStarTreeDocument = createAggregatedDocs(child); node.aggregatedDocId = child.aggregatedDocId; } else { @@ -720,12 +721,12 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException } else { // If no star child exists, aggregate all aggregated documents from non-star children if (node.children.values().size() == 1) { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); node.aggregatedDocId = child.aggregatedDocId; } } else { - for (TreeNode child : node.children.values()) { + for (InMemoryTreeNode child : node.children.values()) { aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child)); } if (null == aggregatedStarTreeDocument) { @@ -760,7 +761,7 @@ public void close() throws IOException { abstract Iterator mergeStarTrees(List starTreeValues) throws IOException; - public TreeNode getRootNode() { + public InMemoryTreeNode getRootNode() { return rootNode; } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java index 6f11148dee468..32feb78a4db3d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeDataWriter.java @@ -20,7 +20,7 @@ import java.util.List; import java.util.Queue; -import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; +import static org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES; import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; /** diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java deleted file mode 100644 index 357c8a49f600c..0000000000000 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/MetricEntry.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.compositeindex.datacube.startree.fileformats.meta; - -import org.opensearch.index.compositeindex.datacube.MetricStat; - -import java.util.Objects; - -/** - * Holds the pair of metric name and it's associated stat - * - * @opensearch.experimental - */ -public class MetricEntry { - - private final String metricFieldName; - private final MetricStat metricStat; - - public MetricEntry(String metricFieldName, MetricStat metricStat) { - this.metricFieldName = metricFieldName; - this.metricStat = metricStat; - } - - public String getMetricFieldName() { - return metricFieldName; - } - - public MetricStat getMetricStat() { - return metricStat; - } - - @Override - public int hashCode() { - return Objects.hashCode(metricFieldName + metricStat.getTypeName()); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj instanceof MetricEntry) { - MetricEntry anotherPair = (MetricEntry) obj; - return metricStat.equals(anotherPair.metricStat) && metricFieldName.equals(anotherPair.metricFieldName); - } - return false; - } - -} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java index f7ff91f5928af..112e47d50fde6 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java @@ -119,9 +119,11 @@ private static void writeMeta( metaOut.writeInt(numNodes); // number of dimensions + // TODO: Revisit the number of dimensions for timestamps (as we will split timestamp into min, hour, etc.) metaOut.writeVInt(starTreeField.getDimensionsOrder().size()); // dimensions + // TODO: Add sub-dimensions for timestamps (as we will split timestamp into min, hour, etc.) for (Dimension dimension : starTreeField.getDimensionsOrder()) { metaOut.writeString(dimension.getField()); } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java index e8d24cf8f6209..7519c85562a8c 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadata.java @@ -12,6 +12,7 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.store.IndexInput; import org.opensearch.index.compositeindex.CompositeIndexMetadata; +import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; import org.opensearch.index.mapper.CompositeMappedFieldType; @@ -19,7 +20,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Set; /** @@ -62,9 +65,9 @@ public class StarTreeMetadata extends CompositeIndexMetadata { private final List dimensionFields; /** - * List of metric entries, containing field names and associated metric statistic. + * List of metrics, containing field names and associated metric statistics. */ - private final List metricEntries; + private final List metrics; /** * The total number of documents aggregated in this star-tree segment. @@ -119,7 +122,7 @@ public StarTreeMetadata( this.version = version; this.numberOfNodes = readNumberOfNodes(); this.dimensionFields = readStarTreeDimensions(); - this.metricEntries = readMetricEntries(); + this.metrics = readMetricEntries(); this.segmentAggregatedDocCount = readSegmentAggregatedDocCount(); this.maxLeafDocs = readMaxLeafDocs(); this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims(); @@ -141,7 +144,7 @@ public StarTreeMetadata( * @param compositeFieldType type of the composite field. Here, STAR_TREE field. * @param version The version of the star tree stored in the segments. * @param dimensionFields list of dimension fields - * @param metricEntries list of metric entries + * @param metrics list of metric entries * @param segmentAggregatedDocCount segment aggregated doc count * @param maxLeafDocs max leaf docs * @param skipStarNodeCreationInDims set of dimensions to skip star node creation @@ -156,7 +159,7 @@ public StarTreeMetadata( Integer version, Integer numberOfNodes, List dimensionFields, - List metricEntries, + List metrics, Integer segmentAggregatedDocCount, Integer maxLeafDocs, Set skipStarNodeCreationInDims, @@ -171,7 +174,7 @@ public StarTreeMetadata( this.version = version; this.numberOfNodes = numberOfNodes; this.dimensionFields = dimensionFields; - this.metricEntries = metricEntries; + this.metrics = metrics; this.segmentAggregatedDocCount = segmentAggregatedDocCount; this.maxLeafDocs = maxLeafDocs; this.skipStarNodeCreationInDims = skipStarNodeCreationInDims; @@ -203,17 +206,19 @@ private int readMetricsCount() throws IOException { return meta.readVInt(); } - private List readMetricEntries() throws IOException { + private List readMetricEntries() throws IOException { int metricCount = readMetricsCount(); - List metricEntries = new ArrayList<>(); + Map starTreeMetricMap = new LinkedHashMap<>(); for (int i = 0; i < metricCount; i++) { - String metricFieldName = meta.readString(); + String metricName = meta.readString(); int metricStatOrdinal = meta.readVInt(); - metricEntries.add(new MetricEntry(metricFieldName, MetricStat.fromMetricOrdinal(metricStatOrdinal))); + MetricStat metricStat = MetricStat.fromMetricOrdinal(metricStatOrdinal); + Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>())); + metric.getMetrics().add(metricStat); } - return metricEntries; + return new ArrayList<>(starTreeMetricMap.values()); } private int readSegmentAggregatedDocCount() throws IOException { @@ -282,8 +287,8 @@ public List getDimensionFields() { * * @return star-tree metric entries */ - public List getMetricEntries() { - return metricEntries; + public List getMetrics() { + return metrics; } /** diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java similarity index 94% rename from server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java rename to server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java index df0662ccb1fb1..f1663de7087c4 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java @@ -5,16 +5,16 @@ * this file be licensed under the Apache-2.0 license or a * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.node; +package org.opensearch.index.compositeindex.datacube.startree.fileformats.node; import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import java.io.IOException; import java.io.UncheckedIOException; import java.util.Iterator; -import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL; - /** * Fixed Length implementation of {@link StarTreeNode}. *

@@ -187,16 +187,17 @@ public byte getStarTreeNodeType() throws IOException { } @Override - public StarTreeNode getChildForDimensionValue(Long dimensionValue, boolean isStar) throws IOException { + public StarTreeNode getChildStarNode() throws IOException { + return handleStarNode(); + } + + @Override + public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOException { // there will be no children for leaf nodes if (isLeaf()) { return null; } - // Specialize star node for performance - if (isStar) { - return handleStarNode(); - } StarTreeNode resultStarTreeNode = null; if (null != dimensionValue) { resultStarTreeNode = binarySearchChild(dimensionValue); @@ -213,7 +214,7 @@ public StarTreeNode getChildForDimensionValue(Long dimensionValue, boolean isSta */ private FixedLengthStarTreeNode handleStarNode() throws IOException { FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); - if (firstNode.getDimensionValue() == ALL) { + if (firstNode.getStarTreeNodeType() == StarTreeNodeType.STAR.getValue()) { return firstNode; } else { return null; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java new file mode 100644 index 0000000000000..84271be81f5e4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Holds classes associated with star tree node with file formats + */ +package org.opensearch.index.compositeindex.datacube.startree.fileformats.node; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java index 93d0cc93e8843..828d98368b9e6 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java @@ -10,6 +10,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RandomAccessInput; import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode; import java.io.IOException; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java index d29d9145853ad..fce3e30e9ebf6 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNode.java @@ -101,13 +101,21 @@ public interface StarTreeNode { byte getStarTreeNodeType() throws IOException; /** - * Returns the child star-tree node for the given dimension value. + * Returns the child node for the given dimension value in the star-tree. * * @param dimensionValue the dimension value * @return the child node for the given dimension value or null if child is not present * @throws IOException if an I/O error occurs while retrieving the child node */ - StarTreeNode getChildForDimensionValue(Long dimensionValue, boolean isStar) throws IOException; + StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOException; + + /** + * Returns the child star node for a node in the star-tree. + * + * @return the child node for the star node if star child node is not present + * @throws IOException if an I/O error occurs while retrieving the child node + */ + StarTreeNode getChildStarNode() throws IOException; /** * Returns an iterator over the children of the current star-tree node. diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java deleted file mode 100644 index a5d59a2602633..0000000000000 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/TreeNode.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.index.compositeindex.datacube.startree.utils; - -import org.opensearch.common.annotation.ExperimentalApi; - -import java.util.Map; - -/** - * /** - * Represents a node in a tree data structure, specifically designed for a star-tree implementation. - * A star-tree node will represent both star and non-star nodes. - * - * @opensearch.experimental - */ -@ExperimentalApi -public class TreeNode { - - public static final int ALL = -1; - - /** - * The dimension id for the dimension (field) associated with this star-tree node. - */ - public int dimensionId = ALL; - - /** - * The starting document id (inclusive) associated with this star-tree node. - */ - public int startDocId = ALL; - - /** - * The ending document id (exclusive) associated with this star-tree node. - */ - public int endDocId = ALL; - - /** - * The aggregated document id associated with this star-tree node. - */ - public int aggregatedDocId = ALL; - - /** - * The child dimension identifier associated with this star-tree node. - */ - public int childDimensionId = ALL; - - /** - * The value of the dimension associated with this star-tree node. - */ - public long dimensionValue = ALL; - - /** - * A flag indicating whether this node is a star node (a node that represents an aggregation of all dimensions). - */ - public boolean isStarNode = false; - - /** - * A map containing the child nodes of this star-tree node, keyed by their dimension id. - */ - public Map children; - - public long getDimensionValue() { - return dimensionValue; - } -} diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java index e77f184ac0243..33088e8ccbcb3 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/builder/AbstractStarTreeBuilderTests.java @@ -36,8 +36,10 @@ import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator; -import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DocumentMapper; import org.opensearch.index.mapper.Mapper; @@ -1314,7 +1316,7 @@ public void test_build_starTreeDataset() throws IOException { Iterator expectedStarTreeDocumentIterator = expectedStarTreeDocuments(); Iterator resultStarTreeDocumentIterator = resultStarTreeDocuments.iterator(); Map> dimValueToDocIdMap = new HashMap<>(); - builder.rootNode.isStarNode = true; + builder.rootNode.nodeType = StarTreeNodeType.STAR.getValue(); traverseStarTree(builder.rootNode, dimValueToDocIdMap, true); Map> expectedDimToValueMap = getExpectedDimToValueMap(); @@ -2784,13 +2786,13 @@ private static StarTreeField getStarTreeFieldWithDocCount(int maxLeafDocs, boole return sf; } - private void traverseStarTree(TreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { - TreeNode starTree = root; + private void traverseStarTree(InMemoryTreeNode root, Map> dimValueToDocIdMap, boolean traverStarNodes) { + InMemoryTreeNode starTree = root; // Use BFS to traverse the star tree - Queue queue = new ArrayDeque<>(); + Queue queue = new ArrayDeque<>(); queue.add(starTree); int currentDimensionId = -1; - TreeNode starTreeNode; + InMemoryTreeNode starTreeNode; List docIds = new ArrayList<>(); while ((starTreeNode = queue.poll()) != null) { int dimensionId = starTreeNode.dimensionId; @@ -2801,17 +2803,17 @@ private void traverseStarTree(TreeNode root, Map> di // store aggregated document of the node int docId = starTreeNode.aggregatedDocId; Map map = dimValueToDocIdMap.getOrDefault(dimensionId, new HashMap<>()); - if (starTreeNode.isStarNode) { + if (starTreeNode.nodeType == StarTreeNodeType.STAR.getValue()) { map.put(Long.MAX_VALUE, docId); } else { map.put(starTreeNode.dimensionValue, docId); } dimValueToDocIdMap.put(dimensionId, map); - if (starTreeNode.children != null && (!traverStarNodes || starTreeNode.isStarNode)) { - Iterator childrenIterator = starTreeNode.children.values().iterator(); + if (starTreeNode.children != null && (!traverStarNodes || starTreeNode.nodeType == StarTreeNodeType.STAR.getValue())) { + Iterator childrenIterator = starTreeNode.children.values().iterator(); while (childrenIterator.hasNext()) { - TreeNode childNode = childrenIterator.next(); + InMemoryTreeNode childNode = childrenIterator.next(); queue.add(childNode); } } @@ -2933,43 +2935,48 @@ public void testMergeFlow() throws IOException { validateStarTree(builder.getRootNode(), 4, 1, builder.getStarTreeDocuments()); } - private void validateStarTree(TreeNode root, int totalDimensions, int maxLeafDocuments, List starTreeDocuments) { + private void validateStarTree( + InMemoryTreeNode root, + int totalDimensions, + int maxLeafDocuments, + List starTreeDocuments + ) { Queue queue = new LinkedList<>(); queue.offer(new Object[] { root, false }); while (!queue.isEmpty()) { Object[] current = queue.poll(); - TreeNode node = (TreeNode) current[0]; + InMemoryTreeNode node = (InMemoryTreeNode) current[0]; boolean currentIsStarNode = (boolean) current[1]; assertNotNull(node); // assert dimensions - if (node.dimensionId != TreeNode.ALL) { + if (node.dimensionId != StarTreeUtils.ALL) { assertTrue(node.dimensionId >= 0 && node.dimensionId < totalDimensions); } if (node.children != null && !node.children.isEmpty()) { assertEquals(node.dimensionId + 1, node.childDimensionId); assertTrue(node.childDimensionId < totalDimensions); - TreeNode starNode = null; + InMemoryTreeNode starNode = null; Object[] nonStarNodeCumulativeMetrics = getMetrics(starTreeDocuments); - for (Map.Entry entry : node.children.entrySet()) { + for (Map.Entry entry : node.children.entrySet()) { Long childDimensionValue = entry.getKey(); - TreeNode child = entry.getValue(); + InMemoryTreeNode child = entry.getValue(); Object[] currMetrics = getMetrics(starTreeDocuments); - if (!child.isStarNode) { + if (child.nodeType != StarTreeNodeType.STAR.getValue()) { // Validate dimension values in documents for (int i = child.startDocId; i < child.endDocId; i++) { StarTreeDocument doc = starTreeDocuments.get(i); int j = 0; addMetrics(doc, currMetrics, j); - if (!child.isStarNode) { + if (child.nodeType != StarTreeNodeType.STAR.getValue()) { Long dimension = doc.dimensions[child.dimensionId]; assertEquals(childDimensionValue, dimension); if (dimension != null) { assertEquals(child.dimensionValue, (long) dimension); } else { // TODO : fix this ? - assertEquals(child.dimensionValue, TreeNode.ALL); + assertEquals(child.dimensionValue, StarTreeUtils.ALL); } } } diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java index 5f7a48f8f731c..d638f697e8441 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -82,10 +82,10 @@ public void test_StarTreeNode() throws IOException { StarTreeNode child = childrenIterator.next(); if (child.getStarTreeNodeType() == StarTreeNodeType.DEFAULT.getValue()) { assertStarTreeNode( - starTreeNode.getChildForDimensionValue(child.getDimensionValue(), false), + starTreeNode.getChildForDimensionValue(child.getDimensionValue()), inMemoryTreeNodeMap.get(child.getDimensionValue()) ); - assertNull(starTreeNode.getChildForDimensionValue(child.getDimensionValue(), true)); + assertNull(starTreeNode.getChildStarNode()); } queue.add(child); @@ -124,7 +124,7 @@ public void test_starTreeSearch() throws IOException { for (int i = 0; i < maxLevels - 1; i++) { InMemoryTreeNode randomChildNode = randomFrom(inMemoryTreeNode.children.values()); - StarTreeNode randomStarTreeChildNode = starTreeNode.getChildForDimensionValue(randomChildNode.dimensionValue, false); + StarTreeNode randomStarTreeChildNode = starTreeNode.getChildForDimensionValue(randomChildNode.dimensionValue); assertNotNull(randomStarTreeChildNode); assertStarTreeNode(randomStarTreeChildNode, randomChildNode); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java index b3845e2d31e2f..74d9244e16010 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java @@ -182,10 +182,19 @@ public void test_starTreeMetadata() throws IOException { assertEquals(dimensionsOrder.get(i).getField(), starTreeMetadata.getDimensionFields().get(i)); } - for (int i = 0; i < metricAggregatorInfos.size(); i++) { - MetricEntry metricEntry = starTreeMetadata.getMetricEntries().get(i); - assertEquals(metricAggregatorInfos.get(i).getField(), metricEntry.getMetricFieldName()); - assertEquals(metricAggregatorInfos.get(i).getMetricStat(), metricEntry.getMetricStat()); + assertEquals(starTreeField.getMetrics().size(), starTreeMetadata.getMetrics().size()); + + for (int i = 0; i < starTreeField.getMetrics().size(); i++) { + + Metric expectedMetric = starTreeField.getMetrics().get(i); + Metric resultMetric = starTreeMetadata.getMetrics().get(i); + + assertEquals(expectedMetric.getField(), resultMetric.getField()); + assertEquals(expectedMetric.getMetrics().size(), resultMetric.getMetrics().size()); + + for (int j = 0; j < resultMetric.getMetrics().size(); j++) { + assertEquals(expectedMetric.getMetrics().get(j), resultMetric.getMetrics().get(j)); + } } assertEquals(segmentDocumentCount, starTreeMetadata.getSegmentAggregatedDocCount(), 0); assertEquals(maxLeafDocs, starTreeMetadata.getMaxLeafDocs(), 0); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java similarity index 93% rename from server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java rename to server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java index 61163a37582ad..daf2acbcf2365 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.compositeindex.datacube.startree.node; +package org.opensearch.index.compositeindex.datacube.startree.fileformats.node; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -14,6 +14,8 @@ import org.apache.lucene.store.IndexOutput; import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -139,7 +141,7 @@ public void testGetStarTreeNodeType() throws IOException { public void testGetChildForDimensionValue() throws IOException { long dimensionValue = randomIntBetween(0, node.children.size() - 2); - FixedLengthStarTreeNode childNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(dimensionValue, false); + FixedLengthStarTreeNode childNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(dimensionValue); assertNotNull(childNode); assertEquals(dimensionValue, childNode.getDimensionValue()); } @@ -157,19 +159,19 @@ public void testGetChildrenIterator() throws IOException { public void testGetChildForStarNode() throws IOException { // Assuming the first child is a star node in our test data - FixedLengthStarTreeNode starNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue((long) StarTreeUtils.ALL, true); + FixedLengthStarTreeNode starNode = (FixedLengthStarTreeNode) starTreeNode.getChildStarNode(); assertNotNull(starNode); assertEquals(StarTreeUtils.ALL, starNode.getDimensionValue()); } public void testGetChildForNullNode() throws IOException { - FixedLengthStarTreeNode nullNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(null, false); + FixedLengthStarTreeNode nullNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(null); assertNull(nullNode); } public void testGetChildForInvalidDimensionValue() throws IOException { long invalidDimensionValue = Long.MAX_VALUE; - assertThrows(AssertionError.class, () -> starTreeNode.getChildForDimensionValue(invalidDimensionValue, false)); + assertThrows(AssertionError.class, () -> starTreeNode.getChildForDimensionValue(invalidDimensionValue)); } public void tearDown() throws Exception { From 4e809833c647c8812a2fd0c715ef40f02c39b2de Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Wed, 28 Aug 2024 16:49:48 +0530 Subject: [PATCH 08/11] improved binary search to reduce search space in the presence of null or star node Signed-off-by: Sarthak Aggarwal --- .../node/FixedLengthStarTreeNode.java | 27 ++++++++++++++++++- .../node/FixedLengthStarTreeNodeTests.java | 15 ++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java index f1663de7087c4..89ac4af51e221 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNode.java @@ -214,7 +214,20 @@ public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOExce */ private FixedLengthStarTreeNode handleStarNode() throws IOException { FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId); - if (firstNode.getStarTreeNodeType() == StarTreeNodeType.STAR.getValue()) { + return matchStarTreeNodeTypeOrNull(firstNode, StarTreeNodeType.STAR); + } + + /** + * Checks if the given node matches the specified StarTreeNodeType. + * + * @param firstNode The FixedLengthStarTreeNode to check. + * @param starTreeNodeType The StarTreeNodeType to match against. + * @return The firstNode if its type matches the targetType, null otherwise. + * @throws IOException If an I/O error occurs during the operation. + */ + private static FixedLengthStarTreeNode matchStarTreeNodeTypeOrNull(FixedLengthStarTreeNode firstNode, StarTreeNodeType starTreeNodeType) + throws IOException { + if (firstNode.getStarTreeNodeType() == starTreeNodeType.getValue()) { return firstNode; } else { return null; @@ -229,7 +242,19 @@ private FixedLengthStarTreeNode handleStarNode() throws IOException { * @throws IOException If there's an error reading from the input */ private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException { + int low = firstChildId; + + // if the current node is star node, increment the low to reduce the search space + if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, firstChildId), StarTreeNodeType.STAR) != null) { + low++; + } + + // if the current node is null node, increment the low to reduce the search space + if (matchStarTreeNodeTypeOrNull(new FixedLengthStarTreeNode(in, low), StarTreeNodeType.NULL) != null) { + low++; + } + int high = getInt(LAST_CHILD_ID_OFFSET); while (low <= high) { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java index daf2acbcf2365..7855bc623849d 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java @@ -34,6 +34,7 @@ public class FixedLengthStarTreeNodeTests extends OpenSearchTestCase { private Directory directory; InMemoryTreeNode node; InMemoryTreeNode starChild; + InMemoryTreeNode nullChild; FixedLengthStarTreeNode starTreeNode; @Before @@ -63,6 +64,17 @@ public void setup() throws IOException { starChild.children = new HashMap<>(); node.children.put(-1L, starChild); + nullChild = new InMemoryTreeNode(); + nullChild.dimensionId = node.dimensionId + 1; + nullChild.dimensionValue = -1; + nullChild.startDocId = randomInt(); + nullChild.endDocId = randomInt(); + nullChild.childDimensionId = -1; + nullChild.aggregatedDocId = randomInt(); + nullChild.nodeType = (byte) -1; + nullChild.children = new HashMap<>(); + node.children.put(null, nullChild); + for (int i = 1; i < randomIntBetween(2, 5); i++) { InMemoryTreeNode child = new InMemoryTreeNode(); child.dimensionId = node.dimensionId + 1; @@ -140,7 +152,8 @@ public void testGetStarTreeNodeType() throws IOException { } public void testGetChildForDimensionValue() throws IOException { - long dimensionValue = randomIntBetween(0, node.children.size() - 2); + // TODO: Add a test to verify children with star node, null node and default node with default dimension value -1 + long dimensionValue = randomIntBetween(0, node.children.size() - 3); FixedLengthStarTreeNode childNode = (FixedLengthStarTreeNode) starTreeNode.getChildForDimensionValue(dimensionValue); assertNotNull(childNode); assertEquals(dimensionValue, childNode.getDimensionValue()); From ce468474765519f8c5c8f006a9947d00b4bc486e Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Wed, 28 Aug 2024 18:06:12 +0530 Subject: [PATCH 09/11] addressing comments Signed-off-by: Sarthak Aggarwal --- .../datacube/ReadDimension.java | 2 +- .../fileformats/meta/StarTreeMetaWriter.java | 22 +++++++------------ .../fileformats/meta/StarTreeMetaTests.java | 6 ++--- .../index/mapper/StarTreeMapperTests.java | 3 +++ .../opensearch/test/OpenSearchTestCase.java | 8 ------- 5 files changed, 15 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java index 3b6a02967f384..4264ec87d2c74 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java @@ -15,7 +15,7 @@ import java.util.Objects; /** - * Composite index merge dimension class + * Represents a dimension for reconstructing StarTreeField from file formats during searches and merges. * * @opensearch.experimental */ diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java index 112e47d50fde6..9c07bae77c836 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java @@ -57,7 +57,7 @@ public static void writeStarTreeMetadata( long initialMetaFilePointer = metaOut.getFilePointer(); - writeMetaHeader(metaOut, CompositeMappedFieldType.CompositeFieldType.STAR_TREE, starTreeField.getName()); + writeMetaHeader(metaOut); writeMeta(metaOut, metricAggregatorInfos, starTreeField, numNodes, segmentAggregatedCount, dataFilePointer, dataFileLength); logger.debug( @@ -71,26 +71,14 @@ public static void writeStarTreeMetadata( * Writes the star-tree metadata header. * * @param metaOut the IndexOutput to write the header - * @param compositeFieldType the composite field type of the star-tree field - * @param starTreeFieldName the name of the star-tree field * @throws IOException if an I/O error occurs while writing the header */ - private static void writeMetaHeader( - IndexOutput metaOut, - CompositeMappedFieldType.CompositeFieldType compositeFieldType, - String starTreeFieldName - ) throws IOException { + private static void writeMetaHeader(IndexOutput metaOut) throws IOException { // magic marker for sanity metaOut.writeLong(COMPOSITE_FIELD_MARKER); // version metaOut.writeVInt(VERSION_CURRENT); - - // star tree field name - metaOut.writeString(starTreeFieldName); - - // star tree field type - metaOut.writeString(compositeFieldType.getName()); } /** @@ -115,6 +103,12 @@ private static void writeMeta( long dataFileLength ) throws IOException { + // star tree field name + metaOut.writeString(starTreeField.getName()); + + // star tree field type + metaOut.writeString(CompositeMappedFieldType.CompositeFieldType.STAR_TREE.getName()); + // number of nodes metaOut.writeInt(numNodes); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java index 74d9244e16010..c312fdc4b784c 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java @@ -122,7 +122,7 @@ public void test_starTreeMetadata() throws IOException { new Metric("field4", List.of(MetricStat.SUM)), new Metric("field6", List.of(MetricStat.VALUE_COUNT)) ); - int maxLeafDocs = randomNonNegativeInt(); + int maxLeafDocs = randomInt(Integer.MAX_VALUE); StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( maxLeafDocs, Set.of("field10"), @@ -144,10 +144,10 @@ public void test_starTreeMetadata() throws IOException { dataFileLength = randomNonNegativeLong(); dataFilePointer = randomNonNegativeLong(); - segmentDocumentCount = randomNonNegativeInt(); + segmentDocumentCount = randomInt(Integer.MAX_VALUE); metaOut = directory.createOutput("star-tree-metadata", IOContext.DEFAULT); StarTreeWriter starTreeWriter = new StarTreeWriter(); - int numberOfNodes = randomNonNegativeInt(); + int numberOfNodes = randomInt(Integer.MAX_VALUE); starTreeWriter.writeStarTreeMetadata( metaOut, starTreeField, diff --git a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java index b0145dbde4bee..81454b210d6be 100644 --- a/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/StarTreeMapperTests.java @@ -333,6 +333,9 @@ public void testDimensions() { assertEquals(n1, n2); n2 = new NumericDimension("name1"); assertNotEquals(n1, n2); + } + + public void testReadDimensions() { ReadDimension r1 = new ReadDimension("name"); ReadDimension r2 = new ReadDimension("name"); assertEquals(r1, r2); diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index 1b740476f89c3..6afc7c23d9e66 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -807,14 +807,6 @@ public static int randomInt() { return random().nextInt(); } - /** - * @return a int between 0 and Integer.MAX_VALUE (inclusive) chosen uniformly at random. - */ - public static int randomNonNegativeInt() { - int randomInt = randomInt(); - return randomInt == Integer.MIN_VALUE ? 0 : Math.abs(randomInt); - } - /** * @return a long between 0 and Long.MAX_VALUE (inclusive) chosen uniformly at random. */ From 7dd70ee5a33e987f05b605ea8f1e5343e393ba96 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Wed, 28 Aug 2024 18:53:35 +0530 Subject: [PATCH 10/11] addressing nits Signed-off-by: Sarthak Aggarwal --- .../startree/builder/BaseStarTreeBuilder.java | 2 +- .../startree/builder/StarTreeBuilder.java | 2 +- .../fileformats/meta/StarTreeMetaWriter.java | 2 + .../datacube/startree/node/StarTree.java | 34 --------------- .../startree/node/StarTreeFactory.java | 42 +++++++++++++++++++ .../startree/node/StarTreeNodeType.java | 2 +- .../data/StarTreeFileFormatsTests.java | 8 ++-- .../node/FixedLengthStarTreeNodeTests.java | 5 +-- 8 files changed, 52 insertions(+), 45 deletions(-) delete mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java create mode 100644 server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index d4bb1cc3dc1f7..d3105b4ae23c7 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -557,7 +557,7 @@ void build(Iterator starTreeDocumentIterator) throws IOExcepti int numAggregatedStarTreeDocument = numStarTreeDocs - numStarTreeDocument - numStarTreeDocumentUnderStarNode; logger.debug("Finished creating aggregated documents : {}", numAggregatedStarTreeDocument); - // TODO: When StarTree Codec is ready + // TODO: When StarTreeFactory Codec is ready // Create doc values indices in disk // Serialize and save in disk // Write star tree metadata for off heap implementation diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java index 94c9c9f2efb18..357f48c0cc726 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/StarTreeBuilder.java @@ -34,7 +34,7 @@ public interface StarTreeBuilder extends Closeable { void build(Map fieldProducerMap) throws IOException; /** - * Builds the star tree using StarTree values from multiple segments + * Builds the star tree using StarTreeFactory values from multiple segments * * @param starTreeValuesSubs contains the star tree values from multiple segments * @throws IOException when we are unable to build star-tree diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java index 9c07bae77c836..2515c1efc3aed 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaWriter.java @@ -58,6 +58,8 @@ public static void writeStarTreeMetadata( long initialMetaFilePointer = metaOut.getFilePointer(); writeMetaHeader(metaOut); + + // TODO: Replace the parameters with StarTreeMetadata class object writeMeta(metaOut, metricAggregatorInfos, starTreeField, numNodes, segmentAggregatedCount, dataFilePointer, dataFileLength); logger.debug( diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java deleted file mode 100644 index 828d98368b9e6..0000000000000 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTree.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ -package org.opensearch.index.compositeindex.datacube.startree.node; - -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.RandomAccessInput; -import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; -import org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode; - -import java.io.IOException; - -/** - * Off heap implementation of the star-tree. - * - * @opensearch.experimental - */ -public class StarTree { - private final FixedLengthStarTreeNode root; - - public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { - RandomAccessInput in = data.randomAccessSlice(0, starTreeMetadata.getDataLength()); - root = new FixedLengthStarTreeNode(in, 0); - } - - public StarTreeNode getRoot() { - return root; - } - -} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java new file mode 100644 index 0000000000000..79b5947d4f00a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeFactory.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.compositeindex.datacube.startree.node; + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; +import org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode; + +import java.io.IOException; + +/** + * A factory class for creating off-heap implementations of star-tree nodes. + * + *

This class provides a static factory method to create instances of {@link StarTreeNode} + * from an {@link IndexInput} and {@link StarTreeMetadata}. The implementation uses an + * off-heap data structure to store and access the star-tree data efficiently using random access. + * + * @opensearch.experimental + */ +public class StarTreeFactory { + + /** + * Creates a new instance of {@link StarTreeNode} from the provided {@link IndexInput} and + * {@link StarTreeMetadata}. + * + * @param data The {@link IndexInput} containing the star-tree data. + * @param starTreeMetadata The {@link StarTreeMetadata} containing metadata about the star-tree. + * @return A new instance of {@link StarTreeNode} representing the root of the star-tree. + * @throws IOException If an error occurs while reading the star-tree data. + */ + public static StarTreeNode createStarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOException { + RandomAccessInput in = data.randomAccessSlice(0, starTreeMetadata.getDataLength()); + return new FixedLengthStarTreeNode(in, 0); + } + +} diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java index 7eb2ccd8bcd0e..4c4725e78ff15 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/node/StarTreeNodeType.java @@ -9,7 +9,7 @@ package org.opensearch.index.compositeindex.datacube.startree.node; /** - * Represents the different types of nodes in a StarTree data structure. + * Represents the different types of nodes in a StarTreeFactory data structure. * *

* In order to handle different node types, we use a byte value to represent the node type. diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java index d638f697e8441..4653ac8b08198 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/data/StarTreeFileFormatsTests.java @@ -15,7 +15,7 @@ import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; -import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.test.OpenSearchTestCase; @@ -64,9 +64,8 @@ public void test_StarTreeNode() throws IOException { StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); - StarTree starTree = new StarTree(dataIn, starTreeMetadata); - StarTreeNode starTreeNode = starTree.getRoot(); + StarTreeNode starTreeNode = StarTreeFactory.createStarTree(dataIn, starTreeMetadata); Queue queue = new ArrayDeque<>(); queue.add(starTreeNode); @@ -116,9 +115,8 @@ public void test_starTreeSearch() throws IOException { StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); - StarTree starTree = new StarTree(dataIn, starTreeMetadata); - StarTreeNode starTreeNode = starTree.getRoot(); + StarTreeNode starTreeNode = StarTreeFactory.createStarTree(dataIn, starTreeMetadata); InMemoryTreeNode inMemoryTreeNode = inMemoryTreeNodeMap.get(starTreeNode.getDimensionValue()); assertNotNull(inMemoryTreeNode); diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java index 7855bc623849d..23dabf8093e6e 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java @@ -15,7 +15,7 @@ import org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter; import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata; import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode; -import org.opensearch.index.compositeindex.datacube.startree.node.StarTree; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeFactory; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -98,9 +98,8 @@ public void setup() throws IOException { StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); - StarTree starTree = new StarTree(dataIn, starTreeMetadata); - starTreeNode = (FixedLengthStarTreeNode) starTree.getRoot(); + starTreeNode = (FixedLengthStarTreeNode) StarTreeFactory.createStarTree(dataIn, starTreeMetadata); } From c258c1f72125c82ff3e328d86cea8e24750aae33 Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Thu, 29 Aug 2024 08:50:24 +0530 Subject: [PATCH 11/11] added tests Signed-off-by: Sarthak Aggarwal --- ...aTests.java => StarTreeMetadataTests.java} | 2 +- .../node/FixedLengthStarTreeNodeTests.java | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) rename server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/{StarTreeMetaTests.java => StarTreeMetadataTests.java} (99%) diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java similarity index 99% rename from server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java rename to server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java index c312fdc4b784c..62bd74cc0b3fc 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetaTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/meta/StarTreeMetadataTests.java @@ -49,7 +49,7 @@ import static org.opensearch.index.compositeindex.datacube.startree.fileformats.StarTreeWriter.VERSION_CURRENT; import static org.opensearch.index.mapper.CompositeMappedFieldType.CompositeFieldType.STAR_TREE; -public class StarTreeMetaTests extends OpenSearchTestCase { +public class StarTreeMetadataTests extends OpenSearchTestCase { private IndexOutput metaOut; private IndexInput metaIn; diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java index 23dabf8093e6e..6f24728c24f30 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/node/FixedLengthStarTreeNodeTests.java @@ -186,6 +186,44 @@ public void testGetChildForInvalidDimensionValue() throws IOException { assertThrows(AssertionError.class, () -> starTreeNode.getChildForDimensionValue(invalidDimensionValue)); } + public void testOnlyRootNodePresent() throws IOException { + + Directory directory = newFSDirectory(createTempDir()); + + IndexOutput dataOut = directory.createOutput("star-tree-data-1", IOContext.DEFAULT); + StarTreeWriter starTreeWriter = new StarTreeWriter(); + + InMemoryTreeNode node = new InMemoryTreeNode(); + node.dimensionId = 0; + node.startDocId = randomInt(); + node.endDocId = randomInt(); + node.childDimensionId = 1; + node.aggregatedDocId = randomInt(); + node.nodeType = randomFrom((byte) 0, (byte) -1, (byte) 2); + node.children = new HashMap<>(); + + long starTreeDataLength = starTreeWriter.writeStarTree(dataOut, node, 1, "star-tree"); + + // asserting on the actual length of the star tree data file + assertEquals(starTreeDataLength, 33); + dataOut.close(); + + IndexInput dataIn = directory.openInput("star-tree-data-1", IOContext.READONCE); + StarTreeMetadata starTreeMetadata = mock(StarTreeMetadata.class); + when(starTreeMetadata.getDataLength()).thenReturn(starTreeDataLength); + when(starTreeMetadata.getDataStartFilePointer()).thenReturn(0L); + + FixedLengthStarTreeNode starTreeNode = (FixedLengthStarTreeNode) StarTreeFactory.createStarTree(dataIn, starTreeMetadata); + + assertEquals(starTreeNode.getNumChildren(), 0); + assertNull(starTreeNode.getChildForDimensionValue(randomLong())); + assertThrows(IllegalArgumentException.class, () -> starTreeNode.getChildrenIterator().next()); + assertThrows(UnsupportedOperationException.class, () -> starTreeNode.getChildrenIterator().remove()); + + dataIn.close(); + directory.close(); + } + public void tearDown() throws Exception { super.tearDown(); dataIn.close();