From 6eaf71222c41af38ff95e18bbabe451c13b06106 Mon Sep 17 00:00:00 2001 From: Tilmann Date: Sun, 30 Jun 2024 16:37:22 +0200 Subject: [PATCH] qt2 root resizing (#43) --- CHANGELOG.md | 5 +- src/main/java/org/tinspin/index/Stats.java | 5 +- .../org/tinspin/index/qthypercube/QNode.java | 1 + .../org/tinspin/index/qthypercube/QRNode.java | 1 + .../tinspin/index/qthypercube/QuadTreeKD.java | 2 +- .../org/tinspin/index/qthypercube2/QNode.java | 3 +- .../index/qthypercube2/QuadTreeKD2.java | 17 ++-- .../java/org/tinspin/index/qtplain/QNode.java | 1 + .../org/tinspin/index/qtplain/QRNode.java | 1 + .../org/tinspin/index/util/MathTools.java | 74 +++++++++++++++ .../tinspin/index/test/PointMultimapTest.java | 7 +- .../java/org/tinspin/util/MathToolsTest.java | 91 +++++++++++++++++++ .../org/tinspin/{ => util}/MinHeapTest.java | 3 +- .../tinspin/{ => util}/MinMaxHeapTest.java | 3 +- 14 files changed, 201 insertions(+), 13 deletions(-) create mode 100644 src/main/java/org/tinspin/index/util/MathTools.java create mode 100644 src/test/java/org/tinspin/util/MathToolsTest.java rename src/test/java/org/tinspin/{ => util}/MinHeapTest.java (99%) rename src/test/java/org/tinspin/{ => util}/MinMaxHeapTest.java (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc64f50..9372f10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,10 +10,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [2.1.4 - Unreleased] -- Fixed tree corruption after remove() in QT2. [#40](https://github.com/tzaeschke/tinspin-indexes/issue/40) +- Fixed tree corruption after remove() in QT2. [#40](https://github.com/tzaeschke/tinspin-indexes/issues/40) - Fixed tree consistency (single-entry leaf after remove) - Fixed tree consistency (nValues) -> verify - Fixed bug in qt2.contains() +- Fixed QT2 inconsistency after root resizing after insert(). [#42](https://github.com/tzaeschke/tinspin-indexes/issues/42) + Essentially, we enforce all radii and the center of the root to be a power of two. + This should immensely reduce and problems with precision errors. ## [2.1.3] - 2023-11-19 diff --git a/src/main/java/org/tinspin/index/Stats.java b/src/main/java/org/tinspin/index/Stats.java index 5270d51..312739b 100644 --- a/src/main/java/org/tinspin/index/Stats.java +++ b/src/main/java/org/tinspin/index/Stats.java @@ -27,6 +27,7 @@ public class Stats { public int minLevel = Integer.MAX_VALUE; public int maxLevel = -1; public int maxDepth = 0; + public int maxValuesInNode = 0; public double sumLevel; public int maxNodeSize = -1; public int nLeaf; @@ -49,7 +50,9 @@ public String toString() { ";nNodes=" + nNodes + ";nLeaf=" + nLeaf + ";nInner=" + nInner + - ";minLevel=" + minLevel + + ";maxDepth=" + maxDepth + + ";maxValues=" + maxValuesInNode + + ";minLevel=" + minLevel + ";maxLevel=" + maxLevel + ";avgLevel=" + (sumLevel/nEntries) + ";maxNodeSize=" + maxNodeSize; diff --git a/src/main/java/org/tinspin/index/qthypercube/QNode.java b/src/main/java/org/tinspin/index/qthypercube/QNode.java index b2e8d39..393e3ad 100644 --- a/src/main/java/org/tinspin/index/qthypercube/QNode.java +++ b/src/main/java/org/tinspin/index/qthypercube/QNode.java @@ -303,6 +303,7 @@ void checkNode(QStats s, QNode parent, int depth) { s.nLeaf++; s.nEntries += values.size(); s.histoValues[values.size()]++; + s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size()); for (int i = 0; i < values.size(); i++) { PointEntry e = values.get(i); if (!QUtil.fitsIntoNode(e.point(), center, radius*QUtil.EPS_MUL)) { diff --git a/src/main/java/org/tinspin/index/qthypercube/QRNode.java b/src/main/java/org/tinspin/index/qthypercube/QRNode.java index 9a297a6..d07cda3 100644 --- a/src/main/java/org/tinspin/index/qthypercube/QRNode.java +++ b/src/main/java/org/tinspin/index/qthypercube/QRNode.java @@ -350,6 +350,7 @@ void checkNode(QStats s, QRNode parent, int depth) { } } if (values != null) { + s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size()); for (int i = 0; i < values.size(); i++) { BoxEntry e = values.get(i); if (!QUtil.fitsIntoNode(e.min(), e.max(), center, radius*QUtil.EPS_MUL)) { diff --git a/src/main/java/org/tinspin/index/qthypercube/QuadTreeKD.java b/src/main/java/org/tinspin/index/qthypercube/QuadTreeKD.java index 575ead4..cbe1993 100644 --- a/src/main/java/org/tinspin/index/qthypercube/QuadTreeKD.java +++ b/src/main/java/org/tinspin/index/qthypercube/QuadTreeKD.java @@ -407,7 +407,7 @@ public QStats getStats() { * Statistics container class. */ public static class QStats extends Stats { - final int[] histoValues = new int[100]; + final int[] histoValues = new int[1000]; final int[] histoSubs; public QStats(int dims) { diff --git a/src/main/java/org/tinspin/index/qthypercube2/QNode.java b/src/main/java/org/tinspin/index/qthypercube2/QNode.java index 833f002..53150ea 100644 --- a/src/main/java/org/tinspin/index/qthypercube2/QNode.java +++ b/src/main/java/org/tinspin/index/qthypercube2/QNode.java @@ -433,7 +433,7 @@ void checkNode(QStats s, QNode parent, int depth) { if (parent != null) { if (!QUtil.isNodeEnclosed(center, radius, parent.center, parent.radius*QUtil.EPS_MUL)) { System.out.println("Outer: " + parent.radius + " " + Arrays.toString(parent.center)); - System.out.println("Child: " + radius + " " + Arrays.toString(center)); + System.out.println("Child(" + depth + "): " + radius + " " + Arrays.toString(center)); for (int d = 0; d < center.length; d++) { double parentMax = parent.center[d] + parent.radius; double childMax = center[d] + radius; @@ -457,6 +457,7 @@ void checkNode(QStats s, QNode parent, int depth) { s.nLeaf++; s.nEntries += nValues; s.histoValues[nValues]++; + s.maxValuesInNode = Math.max(s.maxValuesInNode, nValues); for (int i = 0; i < nValues; i++) { PointEntry e = values[i]; checkEntry(e); diff --git a/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java b/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java index f2080bd..01d50d4 100644 --- a/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java +++ b/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java @@ -21,6 +21,7 @@ import java.util.function.Predicate; import org.tinspin.index.*; +import org.tinspin.index.util.MathTools; import org.tinspin.index.util.StringBuilderLn; /** @@ -108,7 +109,7 @@ public void insert(double[] key, T value) { PointEntry e = new PointEntry<>(key, value); if (root == null) { // We calculate a better radius when adding a second point. - root = new QNode<>(key.clone(), INITIAL_RADIUS); + root = new QNode<>(MathTools.floorPowerOfTwoCopy(key), INITIAL_RADIUS); } if (root.getRadius() == INITIAL_RADIUS) { adjustRootSize(key); @@ -127,16 +128,20 @@ private void adjustRootSize(double[] key) { return; } if (root.getRadius() == INITIAL_RADIUS) { - double dist = PointDistance.L2.dist(key, root.getCenter()); - if (dist > 0) { - root.adjustRadius(2 * dist); + double dMax = MathTools.maxDelta(key, root.getCenter()); + for (int i = 0; i < root.getValueCount(); i++) { + dMax = Math.max(dMax, MathTools.maxDelta(root.getValues()[i].point(), root.getCenter())); + } + double radius = MathTools.ceilPowerOfTwo(dMax + QUtil.EPS_MUL); + if (radius > 0) { + root.adjustRadius(radius); } else if (root.getValueCount() >= maxNodeSize - 1) { - // we just set an arbitrary radius here + // all entries have (approximately?) the same coordinates. We just set an arbitrary radius here. root.adjustRadius(1000); } } } - + /** * Check whether a given key exists. * @param key the key to check diff --git a/src/main/java/org/tinspin/index/qtplain/QNode.java b/src/main/java/org/tinspin/index/qtplain/QNode.java index 6c386e1..2a801fe 100644 --- a/src/main/java/org/tinspin/index/qtplain/QNode.java +++ b/src/main/java/org/tinspin/index/qtplain/QNode.java @@ -296,6 +296,7 @@ void checkNode(QStats s, QNode parent, int depth) { } } if (values != null) { + s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size()); for (int i = 0; i < values.size(); i++) { PointEntry e = values.get(i); if (!QUtil.fitsIntoNode(e.point(), center, radius*QUtil.EPS_MUL)) { diff --git a/src/main/java/org/tinspin/index/qtplain/QRNode.java b/src/main/java/org/tinspin/index/qtplain/QRNode.java index bdf942d..766b5c8 100644 --- a/src/main/java/org/tinspin/index/qtplain/QRNode.java +++ b/src/main/java/org/tinspin/index/qtplain/QRNode.java @@ -389,6 +389,7 @@ void checkNode(QStats s, QRNode parent, int depth) { } } if (values != null) { + s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size()); for (int i = 0; i < values.size(); i++) { BoxEntry e = values.get(i); if (!QUtil.fitsIntoNode(e.min(), e.max(), center, radius*QUtil.EPS_MUL)) { diff --git a/src/main/java/org/tinspin/index/util/MathTools.java b/src/main/java/org/tinspin/index/util/MathTools.java new file mode 100644 index 0000000..2507478 --- /dev/null +++ b/src/main/java/org/tinspin/index/util/MathTools.java @@ -0,0 +1,74 @@ +/* + * Copyright 2016-2024 Tilmann Zaeschke + * + * This file is part of TinSpin. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.tinspin.index.util; + +public class MathTools { + + private MathTools() {} + + /** + * Similar to Math.ceil() with the ceiling being the next higher power of 2. + * The resulting number can repeatedly and (almost) always be divided by two without loss of precision. + * @param d input + * @return next power of two above or equal to 'input' + */ + public static double ceilPowerOfTwo(double d) { + double ceil = floorPowerOfTwo(d); + return ceil == d ? ceil : ceil * 2; + } + + /** + * Similar to Math.floor() with the floor being the next lower power of 2. + * The resulting number can repeatedly and (almost) always be divided by two without loss of precision. + * We calculate the "floor" by setting the "fraction" of the bit representation to 0. + * @param d input + * @return next power of two below or equal to 'input' + */ + public static double floorPowerOfTwo(double d) { + // Set fraction to "0". + return Double.longBitsToDouble(Double.doubleToRawLongBits(d) & 0xFFF0_0000_0000_0000L); + } + + /** + * Calculates the {@link #floorPowerOfTwo(double)} of an array. + * @param d input vector + * @return copied vector with next lower power of two below 'input' + * @see #floorPowerOfTwo(double) + */ + public static double[] floorPowerOfTwoCopy(double[] d) { + double[] d2 = new double[d.length]; + for (int i = 0; i < d.length; i++) { + d2[i] = floorPowerOfTwo(d[i]); + } + return d2; + } + + /** + * Returns the maximal delta between any pair of scalars in the vector. + * @param v1 vector 1 + * @param v2 vector 2 + * @return maximal delta (positive or zero). + */ + public static double maxDelta(double[] v1, double[] v2) { + double dMax = 0; + for (int i = 0; i < v1.length; i++) { + dMax = Math.max(dMax, Math.abs(v1[i] - v2[i])); + } + return dMax; + } +} diff --git a/src/test/java/org/tinspin/index/test/PointMultimapTest.java b/src/test/java/org/tinspin/index/test/PointMultimapTest.java index e758e3e..a6a7c6e 100644 --- a/src/test/java/org/tinspin/index/test/PointMultimapTest.java +++ b/src/test/java/org/tinspin/index/test/PointMultimapTest.java @@ -141,7 +141,10 @@ private void smokeTest(List data) { for (Entry e : data) { tree.insert(e.p, e); } - // System.out.println(tree.toStringTree()); + + // Check consistency + tree.getStats(); + for (Entry e : data) { PointIterator it = tree.queryExactPoint(e.p); assertTrue("query(point) failed: " + e, it.hasNext()); @@ -167,6 +170,8 @@ private void smokeTest(List data) { assertEquals(data.size(), nExtent); } + tree.getStats(); + for (Entry e : data) { // System.out.println("query: " + Arrays.toString(e.p)); PointIterator iter = tree.query(e.p, e.p); diff --git a/src/test/java/org/tinspin/util/MathToolsTest.java b/src/test/java/org/tinspin/util/MathToolsTest.java new file mode 100644 index 0000000..c33150f --- /dev/null +++ b/src/test/java/org/tinspin/util/MathToolsTest.java @@ -0,0 +1,91 @@ +/* + * Copyright 2016-2024 Tilmann Zaeschke + * + * This file is part of TinSpin. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.tinspin.util; + +import static org.junit.Assert.*; +import org.junit.Test; +import org.tinspin.index.util.MathTools; + +import java.util.Arrays; + +public class MathToolsTest { + + @Test + public void powerOfTwoCeil() { + assertEquals(1./32., MathTools.ceilPowerOfTwo(0.03), 0.0); + assertEquals(0.5, MathTools.ceilPowerOfTwo(0.3), 0.0); + assertEquals(4, MathTools.ceilPowerOfTwo(3), 0.0); + assertEquals(32, MathTools.ceilPowerOfTwo(30), 0.0); + assertEquals(512, MathTools.ceilPowerOfTwo(300), 0.0); + + assertEquals(-0.5, MathTools.ceilPowerOfTwo(-0.3), 0.0); + assertEquals(-4, MathTools.ceilPowerOfTwo(-3), 0.0); + assertEquals(-32, MathTools.ceilPowerOfTwo(-30), 0.0); + + // identity + assertEquals(0, MathTools.ceilPowerOfTwo(0), 0.0); + assertEquals(-0.5, MathTools.ceilPowerOfTwo(-0.5), 0.0); + assertEquals(0.5, MathTools.ceilPowerOfTwo(0.5), 0.0); + assertEquals(-1, MathTools.ceilPowerOfTwo(-1), 0.0); + assertEquals(1, MathTools.ceilPowerOfTwo(1), 0.0); + assertEquals(-2, MathTools.ceilPowerOfTwo(-2), 0.0); + assertEquals(2, MathTools.ceilPowerOfTwo(2), 0.0); + } + + @Test + public void powerOfTwoFloor() { + assertEquals(1./64., MathTools.floorPowerOfTwo(0.03), 0.0); + assertEquals(0.25, MathTools.floorPowerOfTwo(0.3), 0.0); + assertEquals(2, MathTools.floorPowerOfTwo(3), 0.0); + assertEquals(16, MathTools.floorPowerOfTwo(30), 0.0); + assertEquals(256, MathTools.floorPowerOfTwo(300), 0.0); + + assertEquals(-0.25, MathTools.floorPowerOfTwo(-0.3), 0.0); + assertEquals(-2, MathTools.floorPowerOfTwo(-3), 0.0); + assertEquals(-16, MathTools.floorPowerOfTwo(-30), 0.0); + + // identity + assertEquals(0, MathTools.ceilPowerOfTwo(0), 0.0); + assertEquals(-0.5, MathTools.ceilPowerOfTwo(-0.5), 0.0); + assertEquals(0.5, MathTools.ceilPowerOfTwo(0.5), 0.0); + assertEquals(-1, MathTools.ceilPowerOfTwo(-1), 0.0); + assertEquals(1, MathTools.ceilPowerOfTwo(1), 0.0); + assertEquals(-2, MathTools.ceilPowerOfTwo(-2), 0.0); + assertEquals(2, MathTools.ceilPowerOfTwo(2), 0.0); + } + + @Test + public void powerOfTwoFloor_vector() { + double[] d = {0.03, 0.3, 3, 30, 300}; + double[] dCopy = MathTools.floorPowerOfTwoCopy(d); + assertFalse(Arrays.equals(d, dCopy)); + assertEquals(1./64., dCopy[0], 0.0); + assertEquals(0.25, dCopy[1], 0.0); + assertEquals(2, dCopy[2], 0.0); + assertEquals(16, dCopy[3], 0.0); + assertEquals(256, dCopy[4], 0.0); + } + + @Test + public void maxDelta() { + assertEquals(12, MathTools.maxDelta(new double[]{-4.}, new double[]{8.}), 0.0); + assertEquals(12, MathTools.maxDelta(new double[]{8.}, new double[]{-4.}), 0.0); + + assertEquals(4, MathTools.maxDelta(new double[]{2, 4, 2}, new double[]{3, 8, 4}), 0.0); + } +} diff --git a/src/test/java/org/tinspin/MinHeapTest.java b/src/test/java/org/tinspin/util/MinHeapTest.java similarity index 99% rename from src/test/java/org/tinspin/MinHeapTest.java rename to src/test/java/org/tinspin/util/MinHeapTest.java index f729c0e..8eff04a 100644 --- a/src/test/java/org/tinspin/MinHeapTest.java +++ b/src/test/java/org/tinspin/util/MinHeapTest.java @@ -14,7 +14,8 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */package org.tinspin; + */ +package org.tinspin.util; import org.junit.Test; import org.tinspin.index.util.MinHeapI; diff --git a/src/test/java/org/tinspin/MinMaxHeapTest.java b/src/test/java/org/tinspin/util/MinMaxHeapTest.java similarity index 99% rename from src/test/java/org/tinspin/MinMaxHeapTest.java rename to src/test/java/org/tinspin/util/MinMaxHeapTest.java index a8bc46d..c812671 100644 --- a/src/test/java/org/tinspin/MinMaxHeapTest.java +++ b/src/test/java/org/tinspin/util/MinMaxHeapTest.java @@ -14,7 +14,8 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */package org.tinspin; + */ +package org.tinspin.util; import static org.junit.Assert.*; import org.junit.Test;