From 60499a543a5718e9178f28533fa6958e84c6b5e3 Mon Sep 17 00:00:00 2001 From: Matthew Kavanagh Date: Tue, 28 Jul 2020 18:53:29 +0100 Subject: [PATCH] Prototype bitmap frequency aggs --- .../apache/solr/search/ValueSourceParser.java | 8 +- .../solr/search/facet/BitmapCollectorAgg.java | 17 +- .../solr/search/facet/BitmapFrequencyAgg.java | 81 +++++ .../search/facet/BitmapFrequencyCounter.java | 286 +++++++++++++++++ .../search/facet/BitmapFrequencySlotAcc.java | 52 +++ .../apache/solr/search/facet/BitmapUtil.java | 25 ++ .../facet/FrequencyOfFrequenciesAgg.java | 86 +++++ .../facet/BitmapFrequencyCounterTest.java | 296 ++++++++++++++++++ 8 files changed, 835 insertions(+), 16 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java create mode 100644 solr/core/src/test/org/apache/solr/search/facet/BitmapFrequencyCounterTest.java diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index c1fa05f580ea..e122eced3b59 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -57,14 +57,16 @@ import org.apache.solr.search.facet.AggValueSource; import org.apache.solr.search.facet.AvgAgg; import org.apache.solr.search.facet.BitmapCollectorAgg; +import org.apache.solr.search.facet.BitmapFrequencyAgg; import org.apache.solr.search.facet.CountAgg; +import org.apache.solr.search.facet.FrequencyOfFrequenciesAgg; import org.apache.solr.search.facet.HLLAgg; import org.apache.solr.search.facet.MinMaxAgg; import org.apache.solr.search.facet.PercentileAgg; +import org.apache.solr.search.facet.RelatednessAgg; import org.apache.solr.search.facet.StddevAgg; import org.apache.solr.search.facet.SumAgg; import org.apache.solr.search.facet.SumsqAgg; -import org.apache.solr.search.facet.RelatednessAgg; import org.apache.solr.search.facet.TopDocsAgg; import org.apache.solr.search.facet.UniqueAgg; import org.apache.solr.search.facet.UniqueBlockAgg; @@ -1059,6 +1061,10 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError { addParser("agg_bitmapcollector", new BitmapCollectorAgg.Parser()); + addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser()); + + addParser("agg_bitmapfreqfreq", new FrequencyOfFrequenciesAgg.Parser()); + addParser("childfield", new ChildFieldValueSourceParser()); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java index f107582ee429..3259af4f7707 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java @@ -1,7 +1,5 @@ package org.apache.solr.search.facet; -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -73,7 +71,7 @@ public Object getValue(int slotNum) { byte[] serialised; if (result[slotNum] != null) { result[slotNum].runOptimize(); - serialised = bitmapToBytes(result[slotNum]); + serialised = BitmapUtil.bitmapToBytes(result[slotNum]); } else { serialised = new byte[0]; } @@ -116,20 +114,9 @@ public void finish(Context mcontext) { public Object getMergedResult() { combined.runOptimize(); SimpleOrderedMap map = new SimpleOrderedMap(); - map.add(KEY, bitmapToBytes(combined)); + map.add(KEY, BitmapUtil.bitmapToBytes(combined)); return map; } } - private static byte[] bitmapToBytes(MutableRoaringBitmap bitmap) { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(bos); - try { - bitmap.serialize(dos); - dos.close(); - return bos.toByteArray(); - } catch (IOException ioe) { - throw new RuntimeException("Failed to serialise RoaringBitmap to bytes", ioe); - } - } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java new file mode 100644 index 000000000000..cda7afb0ee80 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java @@ -0,0 +1,81 @@ +package org.apache.solr.search.facet; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency of ordinal values using Roaring Bitmaps. + * + * The response is a map with the following fields: + * - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values + * of {@code i} where {@code bitmaps[i].contains(x)} + * - overflow: a bitmap of ordinal values with {@code frequency >= 2^(bitmaps.length)} + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class BitmapFrequencyAgg extends SimpleAggValueSource { + private final int size; + + public BitmapFrequencyAgg(ValueSource vs, int size) { + super("bitmapfreq", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 16; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new BitmapFrequencyAgg(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + return result.serialize(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java new file mode 100644 index 000000000000..ffd55e419897 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java @@ -0,0 +1,286 @@ +package org.apache.solr.search.facet; + +import org.apache.solr.common.util.SimpleOrderedMap; +import org.roaringbitmap.RoaringBitmap; + +/** + * Counts frequencies of ordinal values using Roaring Bitmaps. + */ +public class BitmapFrequencyCounter { + private final RoaringBitmap[] bitmaps; + private RoaringBitmap overflow; + + /** + * Constructs a new frequency counter. The maximum countable frequency will be given by {@code (2^size)-1}. + * + * @param size The maximum size of the frequencies list + */ + public BitmapFrequencyCounter(int size) { + this.bitmaps = new RoaringBitmap[size]; + } + + /** + * An array of bitmaps encoding frequencies of values: the frequency of a value x is given by the sum of {@code 2^i} + * for all values of {@code i} where {@code bitmaps[i].contains(x)}. + * + * @return The encoded frequencies + */ + public RoaringBitmap[] getBitmaps() { + return this.bitmaps; + } + + /** + * The overflow set of all values with {@code frequency >= 2^(bitmaps.length)}. + * + * @return The overflow set + */ + public RoaringBitmap getOverflow() { + return this.overflow; + } + + /** + * Adds one occurrence of the given value to the counter. + * + * @param value The value to add + */ + public void add(int value) { + // This is just binary addition x+1=y - we carry the value till we find an empty column + for (int i = 0; i < bitmaps.length; i++) { + RoaringBitmap bitmap = bitmaps[i]; + if (bitmap == null) { + bitmap = bitmaps[i] = new RoaringBitmap(); + } + + if (!bitmap.contains(value)) { + bitmap.add(value); + return; + } + + bitmap.remove(value); + } + + // If we reach this point, the frequency of this value is >= 2^(bitmaps.length) + + if (overflow == null) { + overflow = new RoaringBitmap(); + } + + overflow.add(value); + } + + /** + * Serializes the counter. + * + * @return The serialized data + */ + public SimpleOrderedMap serialize() { + SimpleOrderedMap serialized = new SimpleOrderedMap<>(); + + byte[][] serializedBitmaps = new byte[bitmaps.length][]; + + int i = 0; + while (i < bitmaps.length) { + RoaringBitmap bitmap = bitmaps[i]; + if (bitmap == null) { + break; + } + + bitmap.runOptimize(); + serializedBitmaps[i] = BitmapUtil.bitmapToBytes(bitmap); + + i++; + } + + if (i > 0) { + serialized.add("bitmaps", serializedBitmaps); + } + + if (overflow != null) { + overflow.runOptimize(); + serialized.add("overflow", BitmapUtil.bitmapToBytes(overflow)); + } + + return serialized; + } + + /** + * Populates the counter from the given serialized data. + * + * The counter must be fresh (with no values previously added), and have the same size as the counter from which the + * serialized data was generated. + * + * @param serialized The serialized data + */ + public void deserialize(SimpleOrderedMap serialized) { + byte[][] serializedBitmaps = (byte[][]) serialized.get("bitmaps"); + if (serializedBitmaps != null) { + for (int i = 0; i < bitmaps.length; i++) { + bitmaps[i] = BitmapUtil.bytesToBitmap(serializedBitmaps[i]); + } + } + + byte[] overflow = (byte[]) serialized.get("overflow"); + if (overflow != null) { + this.overflow = BitmapUtil.bytesToBitmap(overflow); + } else { + this.overflow = null; + } + } + + /** + * Merges this counter with another (in-place). + * + * The other counter must have the same size as this counter. After this operation, the returned counter will contain + * the values from both counters with their frequencies added together, and references to either of the original + * counters should be discarded (since either may now be invalid, and one will have been modified and returned). + * + * @param other The counter to merge in + * @return The merged counter + */ + public BitmapFrequencyCounter merge(BitmapFrequencyCounter other) { + // The algorithm here is a ripple-carry adder in two dimensions, built from half-adders that are adapted from the + // standard (where s is the sum, and c the carried value): + // + // s = x xor y + // c = x and y + // + // to: + // + // s = x xor y + // c = y andnot s + // + // which allows in-place modification of bitmaps (x modified into s, y modified into c). + + RoaringBitmap c; + + int i = 0; + + RoaringBitmap x = bitmaps[i]; + RoaringBitmap y = other.bitmaps[i]; + if (x == null) { + return other; + } else if (y == null) { + return this; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + + c = y; // c1 = y2 + + i++; + + while (i < bitmaps.length) { + x = bitmaps[i]; + y = other.bitmaps[i]; + if (x == null || y == null) { + break; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + x.xor(c); // x3 = x2 xor c1 + + c.andNot(x); // c2 = c1 andnot x3 + c.or(y); // c3 = c2 or y2 + + i++; + } + + while (i < bitmaps.length) { + x = bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + i++; + } + + while (i < bitmaps.length) { + x = other.bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + bitmaps[i] = x; + + i++; + } + + if (i == bitmaps.length) { + if (overflow == null) { + overflow = c; + } else { + overflow.or(c); + } + } + + return this; + } + + public int[] decode() { + int endIndex = 0; + while (endIndex < bitmaps.length && bitmaps[endIndex] != null) { + endIndex++; + } + + if (endIndex == 0) { + return new int[0]; + } + + int[] result = new int[1 << endIndex]; + + endIndex--; + + if (endIndex == 0) { + result[1] = bitmaps[0].getCardinality(); + } else { + RoaringBitmap highBits = bitmaps[endIndex]; + + decodeLowest(highBits, endIndex - 1, result); + decode(highBits, endIndex - 1, result, 1 << endIndex); + } + + return result; + } + + private void decodeLowest( + RoaringBitmap excludedBits, + int endIndex, + int[] result + ) { + if (endIndex == 0) { + result[1] = RoaringBitmap.andNotCardinality(bitmaps[0], excludedBits); + } else { + RoaringBitmap highBits = RoaringBitmap.andNot(bitmaps[endIndex], excludedBits); + excludedBits = RoaringBitmap.or(bitmaps[endIndex], excludedBits); + + decodeLowest(excludedBits, endIndex - 1, result); + decode(highBits, endIndex - 1, result, 1 << endIndex); + } + } + + private void decode( + RoaringBitmap includedBits, + int endIndex, + int[] result, + int resultOffset + ) { + if (endIndex == 0) { + result[resultOffset] = RoaringBitmap.andNotCardinality(includedBits, bitmaps[0]); + result[resultOffset + 1] = RoaringBitmap.andCardinality(includedBits, bitmaps[0]); + } else { + RoaringBitmap highBits = RoaringBitmap.and(includedBits, bitmaps[endIndex]); + RoaringBitmap lowBits = RoaringBitmap.andNot(includedBits, highBits); + + decode(lowBits, endIndex - 1, result, resultOffset); + decode(highBits, endIndex - 1, result, resultOffset + (1 << endIndex)); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java new file mode 100644 index 000000000000..739fc7bf6aaa --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java @@ -0,0 +1,52 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.function.IntFunction; + +import org.apache.lucene.queries.function.ValueSource; + +public class BitmapFrequencySlotAcc extends FuncSlotAcc { + private BitmapFrequencyCounter[] result; + private final int maxFrequency; + + public BitmapFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, int maxFrequency) { + super(values, fcontext, numSlots); + + this.result = new BitmapFrequencyCounter[numSlots]; + this.maxFrequency = maxFrequency; + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + if (result[slot] == null) { + result[slot] = new BitmapFrequencyCounter(this.maxFrequency); + } + result[slot].add(values.intVal(doc)); + } + + @Override + public int compare(int slotA, int slotB) { + throw new UnsupportedOperationException(); + } + + @Override + public Object getValue(int slotNum) { + if (result[slotNum] != null) { + return result[slotNum].serialize(); + } else { + return Collections.emptyList(); + } + } + + @Override + public void reset() { + Arrays.fill(result, null); + } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, null); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java new file mode 100644 index 000000000000..ba38f64f644e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java @@ -0,0 +1,25 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.roaringbitmap.ImmutableBitmapDataProvider; +import org.roaringbitmap.RoaringBitmap; + +public class BitmapUtil { + public static byte[] bitmapToBytes(ImmutableBitmapDataProvider bitmap) { + ByteBuffer buffer = ByteBuffer.allocate(bitmap.serializedSizeInBytes()); + bitmap.serialize(buffer); + return buffer.array(); + } + + public static RoaringBitmap bytesToBitmap(byte[] bytes) { + try { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.deserialize(ByteBuffer.wrap(bytes)); + return bitmap; + } catch (IOException ioe) { + throw new RuntimeException("Failed to deserialise RoaringBitmap from bytes", ioe); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java new file mode 100644 index 000000000000..af666b4929d9 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java @@ -0,0 +1,86 @@ +package org.apache.solr.search.facet; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency-of-frequencies (number of values occurring x times) of ordinal values. + * + * The response is a map with the following fields: + * - frequencies: an array where {@code frequencies[i]} is the number of values with {@code frequency = i} (omitted + * if empty) + * - overflow: the number of values with {@code frequency > frequencies.length} + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class FrequencyOfFrequenciesAgg extends SimpleAggValueSource { + private final int size; + + public FrequencyOfFrequenciesAgg(ValueSource vs, Integer size) { + super("bitmapfreqfreq", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 8; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new FrequencyOfFrequenciesAgg(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + SimpleOrderedMap map = new SimpleOrderedMap<>(); + + map.add("frequencies", result.decode()); + map.add("overflow", result.getOverflow().getCardinality()); + + return map; + } + } +} diff --git a/solr/core/src/test/org/apache/solr/search/facet/BitmapFrequencyCounterTest.java b/solr/core/src/test/org/apache/solr/search/facet/BitmapFrequencyCounterTest.java new file mode 100644 index 000000000000..fb6f35e246ed --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/facet/BitmapFrequencyCounterTest.java @@ -0,0 +1,296 @@ +package org.apache.solr.search.facet; + +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Test; + +public class BitmapFrequencyCounterTest extends LuceneTestCase { + private static final int TEST_ORDINAL = 5; + + @Test(expected = NegativeArraySizeException.class) + public void givenNegativeSize_whenConstructingCounter() { + new BitmapFrequencyCounter(-1); + } + + @Test + public void givenSize0_whenAddingValue_withFrequency1() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(0); + + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 0); + assertTrue(counter.getOverflow().contains(TEST_ORDINAL)); + } + + @Test + public void givenSize0_whenAddingValue_withFrequency2() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(0); + + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 0); + assertTrue(counter.getOverflow().contains(TEST_ORDINAL)); + } + + @Test + public void givenSize1_whenAddingValue_withFrequency1() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(1); + + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 1); + assertTrue(counter.getBitmaps()[0].contains(TEST_ORDINAL)); + assertNull(counter.getOverflow()); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 2); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 1); + } + + @Test + public void givenSize1_whenAddingValue_withFrequency2() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(1); + + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 1); + assertFalse(counter.getBitmaps()[0].contains(TEST_ORDINAL)); + assertTrue(counter.getOverflow().contains(TEST_ORDINAL)); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 2); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency1() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 2); + assertTrue(counter.getBitmaps()[0].contains(TEST_ORDINAL)); + assertNull(counter.getOverflow()); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 2); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 1); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency2() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 2); + assertFalse(counter.getBitmaps()[0].contains(TEST_ORDINAL)); + assertTrue(counter.getBitmaps()[1].contains(TEST_ORDINAL)); + assertNull(counter.getOverflow()); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 1); + assertEquals(decoded[3], 0); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency3() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 2); + assertTrue(counter.getBitmaps()[0].contains(TEST_ORDINAL)); + assertTrue(counter.getBitmaps()[1].contains(TEST_ORDINAL)); + assertNull(counter.getOverflow()); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 0); + assertEquals(decoded[3], 1); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency4() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + counter.add(TEST_ORDINAL); + + assertEquals(counter.getBitmaps().length, 2); + assertFalse(counter.getBitmaps()[0].contains(TEST_ORDINAL)); + assertFalse(counter.getBitmaps()[1].contains(TEST_ORDINAL)); + assertTrue(counter.getOverflow().contains(TEST_ORDINAL)); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 0); + assertEquals(decoded[3], 0); + } + + @Test + public void givenSize2_whenAddingMultipleValues() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + counter.add(101); + + counter.add(102); + counter.add(102); + counter.add(202); + counter.add(202); + + counter.add(103); + counter.add(103); + counter.add(103); + counter.add(203); + counter.add(203); + counter.add(203); + counter.add(303); + counter.add(303); + counter.add(303); + + assertEquals(counter.getBitmaps().length, 2); + + assertTrue(counter.getBitmaps()[0].contains(101)); + assertFalse(counter.getBitmaps()[1].contains(101)); + + assertFalse(counter.getBitmaps()[0].contains(102)); + assertTrue(counter.getBitmaps()[1].contains(102)); + assertFalse(counter.getBitmaps()[0].contains(202)); + assertTrue(counter.getBitmaps()[1].contains(202)); + + assertTrue(counter.getBitmaps()[0].contains(103)); + assertTrue(counter.getBitmaps()[1].contains(103)); + assertTrue(counter.getBitmaps()[0].contains(203)); + assertTrue(counter.getBitmaps()[1].contains(203)); + assertTrue(counter.getBitmaps()[0].contains(303)); + assertTrue(counter.getBitmaps()[1].contains(303)); + + assertNull(counter.getOverflow()); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 1); + assertEquals(decoded[2], 2); + assertEquals(decoded[3], 3); + } + + @Test + public void givenSize2_whenMergingValues() { + BitmapFrequencyCounter x = new BitmapFrequencyCounter(2); + BitmapFrequencyCounter y = new BitmapFrequencyCounter(2); + + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + + assertEquals(x.getBitmaps().length, 2); + assertFalse(x.getBitmaps()[0].contains(TEST_ORDINAL)); + assertTrue(x.getBitmaps()[1].contains(TEST_ORDINAL)); + assertNull(x.getOverflow()); + + y.add(TEST_ORDINAL); + y.add(TEST_ORDINAL); + + assertEquals(y.getBitmaps().length, 2); + assertFalse(y.getBitmaps()[0].contains(TEST_ORDINAL)); + assertTrue(y.getBitmaps()[1].contains(TEST_ORDINAL)); + assertNull(y.getOverflow()); + + x = x.merge(y); + + assertEquals(x.getBitmaps().length, 2); + assertFalse(x.getBitmaps()[0].contains(TEST_ORDINAL)); + assertFalse(x.getBitmaps()[1].contains(TEST_ORDINAL)); + assertTrue(x.getOverflow().contains(TEST_ORDINAL)); + } + + @Test + public void givenSize4_whenMergingValues() { + BitmapFrequencyCounter x = new BitmapFrequencyCounter(4); + BitmapFrequencyCounter y = new BitmapFrequencyCounter(4); + + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + x.add(TEST_ORDINAL); + + assertEquals(x.getBitmaps().length, 4); + assertFalse(x.getBitmaps()[0].contains(TEST_ORDINAL)); + assertTrue(x.getBitmaps()[1].contains(TEST_ORDINAL)); + assertFalse(x.getBitmaps()[2].contains(TEST_ORDINAL)); + assertTrue(x.getBitmaps()[3].contains(TEST_ORDINAL)); + assertNull(x.getOverflow()); + + y.add(TEST_ORDINAL); + y.add(TEST_ORDINAL); + y.add(TEST_ORDINAL); + y.add(TEST_ORDINAL); + y.add(TEST_ORDINAL); + + assertEquals(y.getBitmaps().length, 4); + assertTrue(y.getBitmaps()[0].contains(TEST_ORDINAL)); + assertFalse(y.getBitmaps()[1].contains(TEST_ORDINAL)); + assertTrue(y.getBitmaps()[2].contains(TEST_ORDINAL)); + assertNull(y.getBitmaps()[3]); + assertNull(y.getOverflow()); + + x = x.merge(y); + + assertEquals(x.getBitmaps().length, 4); + assertTrue(x.getBitmaps()[0].contains(TEST_ORDINAL)); + assertTrue(x.getBitmaps()[1].contains(TEST_ORDINAL)); + assertTrue(x.getBitmaps()[2].contains(TEST_ORDINAL)); + assertTrue(x.getBitmaps()[3].contains(TEST_ORDINAL)); + assertNull(y.getOverflow()); + + int[] decoded = x.decode(); + + assertEquals(decoded.length, 16); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 0); + assertEquals(decoded[3], 0); + assertEquals(decoded[4], 0); + assertEquals(decoded[5], 0); + assertEquals(decoded[6], 0); + assertEquals(decoded[7], 0); + assertEquals(decoded[8], 0); + assertEquals(decoded[9], 0); + assertEquals(decoded[10], 0); + assertEquals(decoded[11], 0); + assertEquals(decoded[12], 0); + assertEquals(decoded[13], 0); + assertEquals(decoded[14], 0); + assertEquals(decoded[15], 1); + } +}