qt2 root resizing (#43)

tzaeschke · Jun 30, 2024 · 6eaf712 · 6eaf712
1 parent 5d06b16
commit 6eaf712
Show file tree

Hide file tree

Showing 14 changed files with 201 additions and 13 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,10 +10,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [2.1.4 - Unreleased]
 
-- Fixed tree corruption after remove() in QT2. [#40](https://github.com/tzaeschke/tinspin-indexes/issue/40)
+- Fixed tree corruption after remove() in QT2. [#40](https://github.com/tzaeschke/tinspin-indexes/issues/40)
   - Fixed tree consistency (single-entry leaf after remove)
   - Fixed tree consistency (nValues) -> verify
   - Fixed bug in qt2.contains()
+- Fixed QT2 inconsistency after root resizing after insert(). [#42](https://github.com/tzaeschke/tinspin-indexes/issues/42)
+  Essentially, we enforce all radii and the center of the root to be a power of two.
+  This should immensely reduce and problems with precision errors. 
 
 ## [2.1.3] - 2023-11-19
 

diff --git a/src/main/java/org/tinspin/index/Stats.java b/src/main/java/org/tinspin/index/Stats.java
@@ -27,6 +27,7 @@ public class Stats {
 	public int minLevel = Integer.MAX_VALUE;
 	public int maxLevel = -1;
 	public int maxDepth = 0;
+	public int maxValuesInNode = 0;
 	public double sumLevel;
 	public int maxNodeSize = -1;
 	public int nLeaf;
@@ -49,7 +50,9 @@ public String toString() {
 				";nNodes=" + nNodes +
 				";nLeaf=" + nLeaf + 
 				";nInner=" + nInner +
-				";minLevel=" + minLevel + 
+				";maxDepth=" + maxDepth +
+				";maxValues=" + maxValuesInNode +
+				";minLevel=" + minLevel +
 				";maxLevel=" + maxLevel + 
 				";avgLevel=" + (sumLevel/nEntries) +
 				";maxNodeSize=" + maxNodeSize;

diff --git a/src/main/java/org/tinspin/index/qthypercube/QNode.java b/src/main/java/org/tinspin/index/qthypercube/QNode.java
@@ -303,6 +303,7 @@ void checkNode(QStats s, QNode<T> parent, int depth) {
 			s.nLeaf++;
 			s.nEntries += values.size();
 			s.histoValues[values.size()]++;
+			s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size());
 			for (int i = 0; i < values.size(); i++) {
 				PointEntry<T> e = values.get(i);
 				if (!QUtil.fitsIntoNode(e.point(), center, radius*QUtil.EPS_MUL)) {

diff --git a/src/main/java/org/tinspin/index/qthypercube/QRNode.java b/src/main/java/org/tinspin/index/qthypercube/QRNode.java
@@ -350,6 +350,7 @@ void checkNode(QStats s, QRNode<T> parent, int depth) {
 			}
 		}
 		if (values != null) {
+			s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size());
 			for (int i = 0; i < values.size(); i++) {
 				BoxEntry<T> e = values.get(i);
 				if (!QUtil.fitsIntoNode(e.min(), e.max(), center, radius*QUtil.EPS_MUL)) {

diff --git a/src/main/java/org/tinspin/index/qthypercube/QuadTreeKD.java b/src/main/java/org/tinspin/index/qthypercube/QuadTreeKD.java
@@ -407,7 +407,7 @@ public QStats getStats() {
 	 * Statistics container class.
 	 */
 	public static class QStats extends Stats {
-		final int[] histoValues = new int[100];
+		final int[] histoValues = new int[1000];
 		final int[] histoSubs;
 
 		public QStats(int dims) {

diff --git a/src/main/java/org/tinspin/index/qthypercube2/QNode.java b/src/main/java/org/tinspin/index/qthypercube2/QNode.java
@@ -433,7 +433,7 @@ void checkNode(QStats s, QNode<T> parent, int depth) {
 		if (parent != null) {
 			if (!QUtil.isNodeEnclosed(center, radius, parent.center, parent.radius*QUtil.EPS_MUL)) {
 				System.out.println("Outer: " + parent.radius + " " + Arrays.toString(parent.center));
-				System.out.println("Child: " + radius + " " + Arrays.toString(center));
+				System.out.println("Child(" + depth + "): " + radius + " " + Arrays.toString(center));
 				for (int d = 0; d < center.length; d++) {
 					double parentMax = parent.center[d] + parent.radius;
 					double childMax = center[d] + radius;
@@ -457,6 +457,7 @@ void checkNode(QStats s, QNode<T> parent, int depth) {
 			s.nLeaf++;
 			s.nEntries += nValues;
 			s.histoValues[nValues]++;
+			s.maxValuesInNode = Math.max(s.maxValuesInNode, nValues);
 			for (int i = 0; i < nValues; i++) {
 				PointEntry<T> e = values[i];
 				checkEntry(e);

diff --git a/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java b/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java
@@ -21,6 +21,7 @@
 import java.util.function.Predicate;
 
 import org.tinspin.index.*;
+import org.tinspin.index.util.MathTools;
 import org.tinspin.index.util.StringBuilderLn;
 
 /**
@@ -108,7 +109,7 @@ public void insert(double[] key, T value) {
 		PointEntry<T> e = new PointEntry<>(key, value);
 		if (root == null) {
 			// We calculate a better radius when adding a second point.
-			root = new QNode<>(key.clone(), INITIAL_RADIUS);
+			root = new QNode<>(MathTools.floorPowerOfTwoCopy(key), INITIAL_RADIUS);
 		}
 		if (root.getRadius() == INITIAL_RADIUS) {
 			adjustRootSize(key);
@@ -127,16 +128,20 @@ private void adjustRootSize(double[] key) {
 			return;
 		}
 		if (root.getRadius() == INITIAL_RADIUS) {
-			double dist = PointDistance.L2.dist(key, root.getCenter());
-			if (dist > 0) {
-				root.adjustRadius(2 * dist);
+			double dMax = MathTools.maxDelta(key, root.getCenter());
+			for (int i = 0; i < root.getValueCount(); i++) {
+				dMax = Math.max(dMax, MathTools.maxDelta(root.getValues()[i].point(), root.getCenter()));
+			}
+			double radius = MathTools.ceilPowerOfTwo(dMax + QUtil.EPS_MUL);
+			if (radius > 0) {
+				root.adjustRadius(radius);
 			} else if (root.getValueCount() >= maxNodeSize - 1) {
-				// we just set an arbitrary radius here
+				// all entries have (approximately?) the same coordinates. We just set an arbitrary radius here.
 				root.adjustRadius(1000);
 			}
 		}
 	}
-	
+
 	/**
 	 * Check whether a given key exists.
 	 * @param key the key to check

diff --git a/src/main/java/org/tinspin/index/qtplain/QNode.java b/src/main/java/org/tinspin/index/qtplain/QNode.java
@@ -296,6 +296,7 @@ void checkNode(QStats s, QNode<T> parent, int depth) {
 			}
 		}
 		if (values != null) {
+			s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size());
 			for (int i = 0; i < values.size(); i++) {
 				PointEntry<T> e = values.get(i);
 				if (!QUtil.fitsIntoNode(e.point(), center, radius*QUtil.EPS_MUL)) {

diff --git a/src/main/java/org/tinspin/index/qtplain/QRNode.java b/src/main/java/org/tinspin/index/qtplain/QRNode.java
@@ -389,6 +389,7 @@ void checkNode(QStats s, QRNode<T> parent, int depth) {
 			}
 		}
 		if (values != null) {
+			s.maxValuesInNode = Math.max(s.maxValuesInNode, values.size());
 			for (int i = 0; i < values.size(); i++) {
 				BoxEntry<T> e = values.get(i);
 				if (!QUtil.fitsIntoNode(e.min(), e.max(), center, radius*QUtil.EPS_MUL)) {

diff --git a/src/main/java/org/tinspin/index/util/MathTools.java b/src/main/java/org/tinspin/index/util/MathTools.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2016-2024 Tilmann Zaeschke
+ *
+ * This file is part of TinSpin.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.tinspin.index.util;
+
+public class MathTools {
+
+    private MathTools() {}
+
+    /**
+     * Similar to Math.ceil() with the ceiling being the next higher power of 2.
+     * The resulting number can repeatedly and (almost) always be divided by two without loss of precision.
+     * @param d input
+     * @return next power of two above or equal to 'input'
+     */
+    public static double ceilPowerOfTwo(double d) {
+        double ceil = floorPowerOfTwo(d);
+        return ceil == d ? ceil : ceil * 2;
+    }
+
+    /**
+     * Similar to Math.floor() with the floor being the next lower power of 2.
+     * The resulting number can repeatedly and (almost) always be divided by two without loss of precision.
+     * We calculate the "floor" by setting the "fraction" of the bit representation to 0.
+     * @param d input
+     * @return next power of two below or equal to 'input'
+     */
+    public static double floorPowerOfTwo(double d) {
+        // Set fraction to "0".
+        return Double.longBitsToDouble(Double.doubleToRawLongBits(d) & 0xFFF0_0000_0000_0000L);
+    }
+
+    /**
+     * Calculates the {@link #floorPowerOfTwo(double)} of an array.
+     * @param d input vector
+     * @return copied vector with next lower power of two below 'input'
+     * @see #floorPowerOfTwo(double)
+     */
+    public static double[] floorPowerOfTwoCopy(double[] d) {
+        double[] d2 = new double[d.length];
+        for (int i = 0; i < d.length; i++) {
+            d2[i] = floorPowerOfTwo(d[i]);
+        }
+        return d2;
+    }
+
+    /**
+     * Returns the maximal delta between any pair of scalars in the vector.
+     * @param v1 vector 1
+     * @param v2 vector 2
+     * @return maximal delta (positive or zero).
+     */
+    public static double maxDelta(double[] v1, double[] v2) {
+        double dMax = 0;
+        for (int i = 0; i < v1.length; i++) {
+            dMax = Math.max(dMax, Math.abs(v1[i] - v2[i]));
+        }
+        return dMax;
+    }
+}
diff --git a/src/test/java/org/tinspin/index/test/PointMultimapTest.java b/src/test/java/org/tinspin/index/test/PointMultimapTest.java
@@ -141,7 +141,10 @@ private void smokeTest(List<Entry> data) {
         for (Entry e : data) {
             tree.insert(e.p, e);
         }
-        // System.out.println(tree.toStringTree());
+
+        // Check consistency
+        tree.getStats();
+
         for (Entry e : data) {
             PointIterator<Entry> it = tree.queryExactPoint(e.p);
             assertTrue("query(point) failed: " + e, it.hasNext());
@@ -167,6 +170,8 @@ private void smokeTest(List<Entry> data) {
             assertEquals(data.size(), nExtent);
         }
 
+        tree.getStats();
+
         for (Entry e : data) {
             // System.out.println("query: " + Arrays.toString(e.p));
             PointIterator<Entry> iter = tree.query(e.p, e.p);

diff --git a/src/test/java/org/tinspin/util/MathToolsTest.java b/src/test/java/org/tinspin/util/MathToolsTest.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2016-2024 Tilmann Zaeschke
+ *
+ * This file is part of TinSpin.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.tinspin.util;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+import org.tinspin.index.util.MathTools;
+
+import java.util.Arrays;
+
+public class MathToolsTest {
+
+    @Test
+    public void powerOfTwoCeil() {
+        assertEquals(1./32., MathTools.ceilPowerOfTwo(0.03), 0.0);
+        assertEquals(0.5, MathTools.ceilPowerOfTwo(0.3), 0.0);
+        assertEquals(4, MathTools.ceilPowerOfTwo(3), 0.0);
+        assertEquals(32, MathTools.ceilPowerOfTwo(30), 0.0);
+        assertEquals(512, MathTools.ceilPowerOfTwo(300), 0.0);
+
+        assertEquals(-0.5, MathTools.ceilPowerOfTwo(-0.3), 0.0);
+        assertEquals(-4, MathTools.ceilPowerOfTwo(-3), 0.0);
+        assertEquals(-32, MathTools.ceilPowerOfTwo(-30), 0.0);
+
+        // identity
+        assertEquals(0, MathTools.ceilPowerOfTwo(0), 0.0);
+        assertEquals(-0.5, MathTools.ceilPowerOfTwo(-0.5), 0.0);
+        assertEquals(0.5, MathTools.ceilPowerOfTwo(0.5), 0.0);
+        assertEquals(-1, MathTools.ceilPowerOfTwo(-1), 0.0);
+        assertEquals(1, MathTools.ceilPowerOfTwo(1), 0.0);
+        assertEquals(-2, MathTools.ceilPowerOfTwo(-2), 0.0);
+        assertEquals(2, MathTools.ceilPowerOfTwo(2), 0.0);
+    }
+
+    @Test
+    public void powerOfTwoFloor() {
+        assertEquals(1./64., MathTools.floorPowerOfTwo(0.03), 0.0);
+        assertEquals(0.25, MathTools.floorPowerOfTwo(0.3), 0.0);
+        assertEquals(2, MathTools.floorPowerOfTwo(3), 0.0);
+        assertEquals(16, MathTools.floorPowerOfTwo(30), 0.0);
+        assertEquals(256, MathTools.floorPowerOfTwo(300), 0.0);
+
+        assertEquals(-0.25, MathTools.floorPowerOfTwo(-0.3), 0.0);
+        assertEquals(-2, MathTools.floorPowerOfTwo(-3), 0.0);
+        assertEquals(-16, MathTools.floorPowerOfTwo(-30), 0.0);
+
+        // identity
+        assertEquals(0, MathTools.ceilPowerOfTwo(0), 0.0);
+        assertEquals(-0.5, MathTools.ceilPowerOfTwo(-0.5), 0.0);
+        assertEquals(0.5, MathTools.ceilPowerOfTwo(0.5), 0.0);
+        assertEquals(-1, MathTools.ceilPowerOfTwo(-1), 0.0);
+        assertEquals(1, MathTools.ceilPowerOfTwo(1), 0.0);
+        assertEquals(-2, MathTools.ceilPowerOfTwo(-2), 0.0);
+        assertEquals(2, MathTools.ceilPowerOfTwo(2), 0.0);
+    }
+
+    @Test
+    public void powerOfTwoFloor_vector() {
+        double[] d = {0.03, 0.3, 3, 30, 300};
+        double[] dCopy = MathTools.floorPowerOfTwoCopy(d);
+        assertFalse(Arrays.equals(d, dCopy));
+        assertEquals(1./64., dCopy[0], 0.0);
+        assertEquals(0.25, dCopy[1], 0.0);
+        assertEquals(2, dCopy[2], 0.0);
+        assertEquals(16, dCopy[3], 0.0);
+        assertEquals(256, dCopy[4], 0.0);
+    }
+
+    @Test
+    public void maxDelta() {
+        assertEquals(12, MathTools.maxDelta(new double[]{-4.}, new double[]{8.}), 0.0);
+        assertEquals(12, MathTools.maxDelta(new double[]{8.}, new double[]{-4.}), 0.0);
+
+        assertEquals(4, MathTools.maxDelta(new double[]{2, 4, 2}, new double[]{3, 8, 4}), 0.0);
+    }
+}
diff --git a/src/test/java/org/tinspin/MinHeapTest.java → ...st/java/org/tinspin/util/MinHeapTest.java b/src/test/java/org/tinspin/MinHeapTest.java → ...st/java/org/tinspin/util/MinHeapTest.java
@@ -14,7 +14,8 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- */package org.tinspin;
+ */
+package org.tinspin.util;
 
 import org.junit.Test;
 import org.tinspin.index.util.MinHeapI;

diff --git a/...test/java/org/tinspin/MinMaxHeapTest.java → ...java/org/tinspin/util/MinMaxHeapTest.java b/...test/java/org/tinspin/MinMaxHeapTest.java → ...java/org/tinspin/util/MinMaxHeapTest.java
@@ -14,7 +14,8 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- */package org.tinspin;
+ */
+package org.tinspin.util;
 
 import static org.junit.Assert.*;
 import org.junit.Test;