From 7011249e3ff1f4fe719431ab198116ce1c2033b9 Mon Sep 17 00:00:00 2001 From: Shailesh Singh Date: Thu, 14 Nov 2024 22:35:16 +0530 Subject: [PATCH] handle unsigned long in flush operations for star tree --- .../datacube/DimensionFactory.java | 14 +- .../datacube/NumericDimension.java | 14 ++ .../builder/OffHeapStarTreeBuilder.java | 9 +- .../utils/StarTreeDocumentsSorter.java | 24 +++- .../index/mapper/MapperBuilderProperties.java | 9 ++ .../index/mapper/NumberFieldMapper.java | 11 +- .../index/mapper/StarTreeMapper.java | 6 +- .../utils/StarTreeDocumentsSorterTests.java | 124 +++++++++++------- 8 files changed, 150 insertions(+), 61 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java index e834706e2fa9d..c99ec50b9236b 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java @@ -36,6 +36,7 @@ public class DimensionFactory { public static Dimension parseAndCreateDimension( String name, String type, + Boolean isUnsignedLong, Map dimensionMap, Mapper.TypeParser.ParserContext c ) { @@ -43,7 +44,7 @@ public static Dimension parseAndCreateDimension( case DateDimension.DATE: return parseAndCreateDateDimension(name, dimensionMap, c); case NumericDimension.NUMERIC: - return new NumericDimension(name); + return new NumericDimension(name, isUnsignedLong); case KEYWORD: return new KeywordDimension(name); default: @@ -53,6 +54,15 @@ public static Dimension parseAndCreateDimension( } } + public static Dimension parseAndCreateDimension( + String name, + String type, + Map dimensionMap, + Mapper.TypeParser.ParserContext c + ) { + return parseAndCreateDimension(name, type, false, dimensionMap, c); + } + public static Dimension parseAndCreateDimension( String name, Mapper.Builder builder, @@ -68,7 +78,7 @@ public static Dimension parseAndCreateDimension( case DATE: return parseAndCreateDateDimension(name, dimensionMap, c); case NUMERIC: - return new NumericDimension(name); + return new NumericDimension(name, builder.isUnsignedLong()); case KEYWORD: return new KeywordDimension(name); default: diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/NumericDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/NumericDimension.java index fe9e3d17c0047..dc3e17458bf86 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/NumericDimension.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/NumericDimension.java @@ -26,10 +26,19 @@ @ExperimentalApi public class NumericDimension implements Dimension { public static final String NUMERIC = "numeric"; + public static final String IS_UNSIGNED_LONG_FIELD = "isUnsignedLong"; + private final String field; + private final Boolean isUnsignedLong; public NumericDimension(String field) { this.field = field; + isUnsignedLong = false; + } + + public NumericDimension(String field, Boolean isUnsignedLong) { + this.field = field; + this.isUnsignedLong = isUnsignedLong; } public String getField() { @@ -61,6 +70,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.field(CompositeDataCubeFieldType.NAME, field); builder.field(CompositeDataCubeFieldType.TYPE, NUMERIC); + builder.field(IS_UNSIGNED_LONG_FIELD, isUnsignedLong); builder.endObject(); return builder; } @@ -77,4 +87,8 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(field); } + + public Boolean isUnsignedLong() { + return isUnsignedLong; + } } diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java index 63659ef684744..dbd0e847e7c1d 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java @@ -17,6 +17,7 @@ import org.apache.lucene.util.LongValues; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.util.io.IOUtils; +import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.startree.StarTreeDocument; import org.opensearch.index.compositeindex.datacube.startree.StarTreeField; import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; @@ -228,6 +229,7 @@ private Iterator sortAndReduceDocuments(int[] sortedDocIds, in logger.debug("Sorted doc ids array is null"); return Collections.emptyIterator(); } + List dimensionsOrder = starTreeDocumentFileManager.starTreeField.getDimensionsOrder(); try { StarTreeDocumentsSorter.sort(sortedDocIds, -1, numDocs, index -> { try { @@ -235,7 +237,7 @@ private Iterator sortAndReduceDocuments(int[] sortedDocIds, in } catch (IOException e) { throw new UncheckedIOException(e); } - }); + }, dimensionsOrder); } catch (UncheckedIOException ex) { // Unwrap UncheckedIOException and throw as IOException if (ex.getCause() != null) { @@ -308,6 +310,7 @@ public List getStarTreeDocuments() throws IOException { @Override public Long getDimensionValue(int docId, int dimensionId) throws IOException { return starTreeDocumentFileManager.getDimensionValue(docId, dimensionId); + } /** @@ -328,13 +331,15 @@ public Iterator generateStarTreeDocumentsForStarNode(int start for (int i = 0; i < numDocs; i++) { sortedDocIds[i] = startDocId + i; } + List dimensionsOrder = starTreeDocumentFileManager.starTreeField.getDimensionsOrder(); StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, index -> { try { return starTreeDocumentFileManager.readDimensions(sortedDocIds[index]); } catch (IOException e) { throw new RuntimeException(e); } - }); + }, dimensionsOrder); + // Create an iterator for aggregated documents return new Iterator() { boolean hasNext = true; diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java index 7b1c63bc611ee..e641c0f223cb5 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorter.java @@ -9,7 +9,10 @@ package org.opensearch.index.compositeindex.datacube.startree.utils; import org.apache.lucene.util.IntroSorter; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import java.util.List; import java.util.Objects; import java.util.function.IntFunction; @@ -24,7 +27,8 @@ public static void sort( final int[] sortedDocIds, final int dimensionId, final int numDocs, - final IntFunction dimensionsReader + final IntFunction dimensionsReader, + final List dimensionsOrder ) { new IntroSorter() { private Long[] dimensions; @@ -45,19 +49,27 @@ protected void setPivot(int i) { protected int comparePivot(int j) { Long[] currentDimensions = dimensionsReader.apply(j); for (int i = dimensionId + 1; i < dimensions.length; i++) { - Long dimension = currentDimensions[i]; - if (!Objects.equals(dimensions[i], dimension)) { - if (dimensions[i] == null && dimension == null) { + Dimension dimension = dimensionsOrder.get(i); + Long dimensionValue = currentDimensions[i]; + if (!Objects.equals(dimensions[i], dimensionValue)) { + if (dimensions[i] == null && dimensionValue == null) { return 0; } - if (dimension == null) { + if (dimensionValue == null) { return -1; } if (dimensions[i] == null) { return 1; } - return Long.compare(dimensions[i], dimension); + if (dimension instanceof NumericDimension) { + NumericDimension numericDimension = (NumericDimension) dimension; + if (numericDimension.isUnsignedLong()) { + return Long.compareUnsigned(dimensions[i], dimensionValue); + } + } + return Long.compare(dimensions[i], dimensionValue); } + } return 0; } diff --git a/server/src/main/java/org/opensearch/index/mapper/MapperBuilderProperties.java b/server/src/main/java/org/opensearch/index/mapper/MapperBuilderProperties.java index ce7b8f28b0b29..445ffbb07262e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MapperBuilderProperties.java +++ b/server/src/main/java/org/opensearch/index/mapper/MapperBuilderProperties.java @@ -29,6 +29,15 @@ default Optional getSupportedDataCubeDimensionType() { return Optional.empty(); } + /** + * Indicates whether the current dimension is unsigned long. + * + * @return true if the dimension is unsigned long, false otherwise. + */ + default Boolean isUnsignedLong() { + return false; + } + /** * Indicates whether the implementation supports data cube metrics. * diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 43e975f95757b..05fff8e3fd3d6 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -177,15 +177,14 @@ public NumberFieldMapper build(BuilderContext context) { @Override public Optional getSupportedDataCubeDimensionType() { - - // unsigned long is not supported as dimension for star tree - if (type.numericType.equals(NumericType.UNSIGNED_LONG)) { - return Optional.empty(); - } - return Optional.of(DimensionType.NUMERIC); } + @Override + public Boolean isUnsignedLong() { + return type.numericType.equals(NumericType.UNSIGNED_LONG); + } + @Override public boolean isDataCubeMetricSupported() { return true; diff --git a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java index 40f05a8b76755..ecab7bf11ce25 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/StarTreeMapper.java @@ -34,6 +34,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.opensearch.index.compositeindex.datacube.NumericDimension.IS_UNSIGNED_LONG_FIELD; + /** * A field mapper for star tree fields * @@ -239,12 +241,14 @@ private Dimension getDimension(String fieldName, Object dimensionMapping, Mapper if (this.objbuilder == null || this.objbuilder.mappersBuilders == null) { String type = (String) XContentMapValues.extractValue(CompositeDataCubeFieldType.TYPE, dimensionMap); dimensionMap.remove(CompositeDataCubeFieldType.TYPE); + Boolean isUnsignedLong = (Boolean) XContentMapValues.extractValue(IS_UNSIGNED_LONG_FIELD, dimensionMap); + dimensionMap.remove(IS_UNSIGNED_LONG_FIELD); if (type == null) { throw new MapperParsingException( String.format(Locale.ROOT, "unable to parse ordered_dimensions for star tree field [%s]", fieldName) ); } - return DimensionFactory.parseAndCreateDimension(name, type, dimensionMap, context); + return DimensionFactory.parseAndCreateDimension(name, type, isUnsignedLong, dimensionMap, context); } else { Optional dimBuilder = findMapperBuilderByName(name, this.objbuilder.mappersBuilders); if (dimBuilder.isEmpty()) { diff --git a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java index b485ea1a4fe3e..c6eb45c3ec5e0 100644 --- a/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java +++ b/server/src/test/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeDocumentsSorterTests.java @@ -9,6 +9,8 @@ package org.opensearch.index.compositeindex.datacube.startree.utils; import org.opensearch.common.Randomness; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -24,37 +26,79 @@ * Tests for {@link StarTreeDocumentsSorter}. */ public class StarTreeDocumentsSorterTests extends OpenSearchTestCase { + private Map testData; + private List dimensionsOrder; @Before public void setUp() throws Exception { super.setUp(); testData = new HashMap<>(); - testData.put(0, new Long[] { -1L, 2L, 3L }); - testData.put(1, new Long[] { 1L, 2L, 2L }); - testData.put(2, new Long[] { -1L, -1L, 3L }); - testData.put(3, new Long[] { 1L, 2L, null }); - testData.put(4, new Long[] { 1L, null, 3L }); + + // 10 documents with 5 dimensions each + testData.put(0, new Long[] { null, 150L, 100L, 300L, null }); + testData.put(1, new Long[] { 1L, null, -9223372036854775807L, 200L, 300L }); + testData.put(2, new Long[] { 2L, -100L, -15L, 250L, null }); + testData.put(3, new Long[] { 2L, -100L, -10L, 210L, -9223372036854775807L }); + testData.put(4, new Long[] { 1L, 120L, null, null, 305L }); + testData.put(5, new Long[] { 2L, 150L, -5L, 200L, 295L }); + testData.put(6, new Long[] { 3L, 105L, null, -200L, -315L }); + testData.put(7, new Long[] { 1L, 120L, -10L, 205L, 310L }); + testData.put(8, new Long[] { null, -100L, 9223372036854775807L, 200L, -300L }); + testData.put(9, new Long[] { 2L, null, -10L, 210L, 325L }); + + dimensionsOrder = Arrays.asList( + new NumericDimension("dim1", false), // Long + new NumericDimension("dim2", true), // Unsigned Long + new NumericDimension("dim3", false), // Long + new NumericDimension("dim4", true), // Unsigned Long + new NumericDimension("dim5", false) // Long + ); } public void testSortDocumentsOffHeap_FirstDimension() { - int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int[] sortedDocIds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; int dimensionId = -1; - int numDocs = 5; + int numDocs = 10; - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); + assertArrayEquals(new int[] { 7, 4, 1, 5, 2, 3, 9, 6, 0, 8 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_SecondDimension() { + int[] sortedDocIds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + int dimensionId = 0; + int numDocs = 10; - assertArrayEquals(new int[] { 2, 0, 1, 3, 4 }, sortedDocIds); + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); + assertArrayEquals(new int[] { 6, 7, 4, 5, 0, 2, 3, 8, 1, 9 }, sortedDocIds); } public void testSortDocumentsOffHeap_ThirdDimension() { - int[] sortedDocIds = { 0, 1, 2, 3, 4 }; + int[] sortedDocIds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; int dimensionId = 1; - int numDocs = 5; + int numDocs = 10; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); + assertArrayEquals(new int[] { 1, 2, 7, 3, 9, 5, 0, 8, 6, 4 }, sortedDocIds); + } + + public void testSortDocumentsOffHeap_FourthDimension() { + int[] sortedDocIds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + int dimensionId = 2; + int numDocs = 10; + + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); + assertArrayEquals(new int[] { 8, 5, 1, 7, 3, 9, 2, 0, 6, 4 }, sortedDocIds); + } - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + public void testSortDocumentsOffHeap_FifthDimension() { + int[] sortedDocIds = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + int dimensionId = 3; + int numDocs = 10; - assertArrayEquals(new int[] { 1, 0, 2, 4, 3 }, sortedDocIds); + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); + assertArrayEquals(new int[] { 3, 6, 8, 5, 1, 4, 7, 9, 0, 2 }, sortedDocIds); } public void testSortDocumentsOffHeap_SingleElement() { @@ -62,8 +106,7 @@ public void testSortDocumentsOffHeap_SingleElement() { int dimensionId = -1; int numDocs = 1; - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); - + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); assertArrayEquals(new int[] { 0 }, sortedDocIds); } @@ -72,32 +115,21 @@ public void testSortDocumentsOffHeap_EmptyArray() { int dimensionId = -1; int numDocs = 0; - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); - + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); assertArrayEquals(new int[] {}, sortedDocIds); } - public void testSortDocumentsOffHeap_SecondDimensionId() { - int[] sortedDocIds = { 0, 1, 2, 3, 4 }; - int dimensionId = 0; - int numDocs = 5; - - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); - - assertArrayEquals(new int[] { 2, 1, 0, 3, 4 }, sortedDocIds); - } - public void testSortDocumentsOffHeap_AllNulls() { Map testData = new HashMap<>(); - testData.put(0, new Long[] { null, null, null }); - testData.put(1, new Long[] { null, null, null }); - testData.put(2, new Long[] { null, null, null }); + testData.put(0, new Long[] { null, null, null, null, null }); + testData.put(1, new Long[] { null, null, null, null, null }); + testData.put(2, new Long[] { null, null, null, null, null }); int[] sortedDocIds = { 0, 1, 2 }; int dimensionId = -1; int numDocs = 3; - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); // The order should remain unchanged as all elements are equal (null) assertArrayEquals(new int[] { 0, 1, 2 }, sortedDocIds); @@ -105,23 +137,21 @@ public void testSortDocumentsOffHeap_AllNulls() { public void testSortDocumentsOffHeap_Negatives() { Map testData = new HashMap<>(); - testData.put(0, new Long[] { -10L, 0L }); - testData.put(1, new Long[] { -9L, 0L }); - testData.put(2, new Long[] { -8L, 0L }); - testData.put(3, new Long[] { -7L, -0L }); - testData.put(4, new Long[] { -15L, -0L }); + testData.put(0, new Long[] { -10L, 0L, null, 0L, -5L }); + testData.put(1, new Long[] { -9L, 0L, null, 0L, -10L }); + testData.put(2, new Long[] { -9L, 0L, null, 0L, 15L }); + testData.put(3, new Long[] { -7L, 0L, null, 0L, -20L }); + testData.put(4, new Long[] { -15L, 0L, null, 0L, -25L }); int[] sortedDocIds = { 0, 1, 2, 3, 4 }; int dimensionId = -1; int numDocs = 5; - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); - - // The order should remain unchanged as all elements are equal (null) + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); assertArrayEquals(new int[] { 4, 0, 1, 2, 3 }, sortedDocIds); } - public void testRandomSort() { + public void testTheRandomSort() { int i = 0; while (i < 10) { testRandomizedSort(); @@ -157,8 +187,14 @@ private void testRandomizedSort() { // for example to start from dimension in 0th index, we need to pass -1 to sort method int dimensionId = random.nextInt(numDimensions) - 1; + List dimensionsOrder = new ArrayList<>(); + for (int i = 0; i < numDimensions; i++) { + Boolean isUnsignedLong = random.nextBoolean(); + dimensionsOrder.add(new NumericDimension("fieldName", isUnsignedLong)); + } + // Sort using StarTreeDocumentsSorter - StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i])); + StarTreeDocumentsSorter.sort(sortedDocIds, dimensionId, numDocs, i -> testData.get(sortedDocIds[i]), dimensionsOrder); // Verify the sorting for (int i = 1; i < numDocs; i++) { @@ -166,7 +202,7 @@ private void testRandomizedSort() { Long[] curr = testData.get(sortedDocIds[i]); boolean isCorrectOrder = true; for (int j = dimensionId + 1; j < numDimensions; j++) { - int comparison = compareLongs(prev[j], curr[j]); + int comparison = compareLongs(prev[j], curr[j], ((NumericDimension) dimensionsOrder.get(j)).isUnsignedLong()); if (comparison < 0) { break; } else if (comparison > 0) { @@ -186,14 +222,14 @@ private void testRandomizedSort() { } } - private int compareLongs(Long a, Long b) { + private int compareLongs(Long a, Long b, Boolean isUnsignedLong) { if (!Objects.equals(a, b)) { if (a == null) { return 1; } else if (b == null) { return -1; } else { - return a.compareTo(b); + return isUnsignedLong ? Long.compareUnsigned(a, b) : Long.compare(a, b); } } return 0;