diff --git a/solr/core/ivy.xml b/solr/core/ivy.xml index 126f081050df..cab1a7da18c1 100644 --- a/solr/core/ivy.xml +++ b/solr/core/ivy.xml @@ -30,6 +30,8 @@ + + diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index e013e9eeeab8..59d2e2ad2f39 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -56,14 +56,18 @@ import org.apache.solr.schema.TextField; import org.apache.solr.search.facet.AggValueSource; import org.apache.solr.search.facet.AvgAgg; +import org.apache.solr.search.facet.BitmapCollectorAgg; +import org.apache.solr.search.facet.BitmapFrequencyAgg; import org.apache.solr.search.facet.CountAgg; +import org.apache.solr.search.facet.FrequencyOfFrequenciesAgg; import org.apache.solr.search.facet.HLLAgg; import org.apache.solr.search.facet.MinMaxAgg; import org.apache.solr.search.facet.PercentileAgg; +import org.apache.solr.search.facet.RelatednessAgg; import org.apache.solr.search.facet.StddevAgg; import org.apache.solr.search.facet.SumAgg; import org.apache.solr.search.facet.SumsqAgg; -import org.apache.solr.search.facet.RelatednessAgg; +import org.apache.solr.search.facet.TermFrequencyOfFrequenciesAgg; import org.apache.solr.search.facet.TopDocsAgg; import org.apache.solr.search.facet.UniqueAgg; import org.apache.solr.search.facet.UniqueBlockAgg; @@ -1056,6 +1060,18 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError { addParser("agg_topdocs", new TopDocsAgg.Parser()); + addParser("agg_bitmapcollector", new BitmapCollectorAgg.Parser()); + + addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser()); + + addParser("agg_bitmapfreq64", new BitmapFrequencyAgg.Parser()); + + addParser("agg_bitmapfreqfreq", new FrequencyOfFrequenciesAgg.Parser()); + + addParser("agg_bitmapfreqfreq64", new FrequencyOfFrequenciesAgg.Parser()); + + addParser("agg_termfreqfreq", new TermFrequencyOfFrequenciesAgg.Parser()); + addParser("childfield", new ChildFieldValueSourceParser()); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java new file mode 100644 index 000000000000..3259af4f7707 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java @@ -0,0 +1,122 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.function.IntFunction; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + + +public class BitmapCollectorAgg extends SimpleAggValueSource { + + private static final String KEY = "bitmap"; + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new BitmapCollectorAgg(fp.parseValueSource()); + } + } + + public BitmapCollectorAgg(ValueSource vs) { + super("bitmapcollector", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new Acc(getArg(), fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(); + } + + @Override + public String description() { + return "bitmapcollector"; + } + + + private class Acc extends FuncSlotAcc { + MutableRoaringBitmap[] result; + + Acc(ValueSource vs, FacetContext fcontext, int numSlots) { + super(vs, fcontext, numSlots); + this.result = new MutableRoaringBitmap[numSlots]; + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + if (result[slot] == null) { + result[slot] = new MutableRoaringBitmap(); + } + result[slot].add(values.intVal(doc)); + } + + @Override + public int compare(int slotA, int slotB) { + return slotA - slotB; + } + + @Override + public Object getValue(int slotNum) { + byte[] serialised; + if (result[slotNum] != null) { + result[slotNum].runOptimize(); + serialised = BitmapUtil.bitmapToBytes(result[slotNum]); + } else { + serialised = new byte[0]; + } + SimpleOrderedMap map = new SimpleOrderedMap(); + map.add(KEY, serialised); + return map; + } + + @Override + public void reset() { + Arrays.fill(result, null); + } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, null); + } + } + + public class Merger extends FacetMerger { + + private MutableRoaringBitmap combined = new MutableRoaringBitmap(); + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + byte[] bitmapBytes = (byte[])((SimpleOrderedMap)facetResult).get(KEY); + if (bitmapBytes.length != 0) { + combined.or(new ImmutableRoaringBitmap(ByteBuffer.wrap(bitmapBytes))); + } + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + combined.runOptimize(); + SimpleOrderedMap map = new SimpleOrderedMap(); + map.add(KEY, BitmapUtil.bitmapToBytes(combined)); + return map; + } + } + +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java new file mode 100644 index 000000000000..2ecbaf19a914 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java @@ -0,0 +1,82 @@ +package org.apache.solr.search.facet; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency of ordinal values using Roaring Bitmaps. + * + * The response is a map with the following fields: + * - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values + * of {@code i} where {@code bitmaps[i].contains(x)} + * - overflow: a map of ordinal values to frequencies, for values with {@code frequency >= 2^(bitmaps.length)} + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class BitmapFrequencyAgg extends SimpleAggValueSource { + private final int size; + + public BitmapFrequencyAgg(ValueSource vs, int size) { + super("bitmapfreq", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 16; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new BitmapFrequencyAgg(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + result.normalize(); + return result.serialize(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg64.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg64.java new file mode 100644 index 000000000000..3c36742643e2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg64.java @@ -0,0 +1,82 @@ +package org.apache.solr.search.facet; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency of ordinal values using Roaring Bitmaps. + * + * The response is a map with the following fields: + * - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values + * of {@code i} where {@code bitmaps[i].contains(x)} + * - overflow: a map of ordinal values to frequencies, for values with {@code frequency >= 2^(bitmaps.length)} + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class BitmapFrequencyAgg64 extends SimpleAggValueSource { + private final int size; + + public BitmapFrequencyAgg64(ValueSource vs, int size) { + super("bitmapfreq64", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc64(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 16; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new BitmapFrequencyAgg64(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter64 result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter64(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter64 deserialized = new BitmapFrequencyCounter64(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + result.normalize(); + return result.serialize(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java new file mode 100644 index 000000000000..b46e234367a1 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java @@ -0,0 +1,315 @@ +package org.apache.solr.search.facet; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.solr.common.util.SimpleOrderedMap; +import org.roaringbitmap.RoaringBatchIterator; +import org.roaringbitmap.RoaringBitmap; + +/** + * Counts frequencies of ordinal values using Roaring Bitmaps. + */ +public class BitmapFrequencyCounter { + private final RoaringBitmap[] bitmaps; + private final Map overflow; + + /** + * Constructs a new frequency counter. Frequencies greater than {@code (2^size)-1} will be represented as a HashMap + * (rather than a compact bitmap encoding), and for efficiency should not represent a large fraction of the distinct + * values to be counted. + * + * @param size The maximum size of the frequencies list + */ + public BitmapFrequencyCounter(int size) { + this.bitmaps = new RoaringBitmap[size]; + this.overflow = new HashMap<>(); + } + + /** + * An array of bitmaps encoding frequencies of values: the frequency of a value x is given by the sum of {@code 2^i} + * for all values of {@code i} where {@code bitmaps[i].contains(x)}. + * + * @return The encoded frequencies + */ + public RoaringBitmap[] getBitmaps() { + return this.bitmaps; + } + + /** + * A map of high-frequency values (with {@code frequency >= 2^(bitmaps.length)}). + * + * @return The map of high-frequency values. + */ + public Map getOverflow() { + return this.overflow; + } + + /** + * Adds one occurrence of the given value to the counter. + * + * @param value The value to add + */ + public void add(int value) { + final Integer overflowCount = overflow.computeIfPresent(value, (v, f) -> f + 1); + if (overflowCount != null) { + return; + } + + // This is just binary addition x+1=y - we carry the value till we find an empty column + for (int i = 0; i < bitmaps.length; i++) { + RoaringBitmap bitmap = bitmaps[i]; + if (bitmap == null) { + bitmap = bitmaps[i] = new RoaringBitmap(); + } + + if (!bitmap.checkedRemove(value)) { + bitmap.add(value); + return; + } + } + + // If we reach this point, the frequency of this value has reached 2^(bitmaps.length) + + overflow.put(value, 1 << bitmaps.length); + } + + /** + * Serializes the counter. + * + * @return The serialized data + */ + public SimpleOrderedMap serialize() { + SimpleOrderedMap serialized = new SimpleOrderedMap<>(); + + List serializedBitmaps = new ArrayList<>(bitmaps.length); + + int i = 0; + while (i < bitmaps.length) { + RoaringBitmap bitmap = bitmaps[i]; + if (bitmap == null) { + break; + } + + bitmap.runOptimize(); + serializedBitmaps.add(BitmapUtil.bitmapToBytes(bitmap)); + + i++; + } + + if (i > 0) { + serialized.add("bitmaps", serializedBitmaps); + } + + if (!overflow.isEmpty()) { + serialized.add("overflow", overflow); + } + + return serialized; + } + + /** + * Populates the counter from the given serialized data. + * + * The counter must be fresh (with no values previously added), and have the same size as the counter from which the + * serialized data was generated. + * + * @param serialized The serialized data + */ + public void deserialize(SimpleOrderedMap serialized) { + List serializedBitmaps = (List) serialized.get("bitmaps"); + if (serializedBitmaps != null) { + for (int i = 0; i < serializedBitmaps.size(); i++) { + bitmaps[i] = BitmapUtil.bytesToBitmap(serializedBitmaps.get(i)); + } + } + + Map overflow = (Map) serialized.get("overflow"); + if (overflow != null) { + this.overflow.putAll(overflow); + } + } + + /** + * Merges this counter with another (in-place). + * + * The other counter must have the same size as this counter. After this operation, the returned counter will contain + * the values from both counters with their frequencies added together, and references to either of the original + * counters should be discarded (since either may now be invalid, and one will have been modified and returned). + * + * @param other The counter to merge in + * @return The merged counter + */ + public BitmapFrequencyCounter merge(BitmapFrequencyCounter other) { + // The algorithm here is a ripple-carry adder in two dimensions, built from half-adders that are adapted from the + // standard (where s is the sum, and c the carried value): + // + // s = x xor y + // c = x and y + // + // to: + // + // s = x xor y + // c = y andnot s + // + // which allows in-place modification of bitmaps (x modified into s, y modified into c). + + if (bitmaps.length == 0) { + other.overflow.forEach((value, freq) -> overflow.merge(value, freq, Integer::sum)); + + return this; + } + + RoaringBitmap c; + + int i = 0; + + RoaringBitmap x = bitmaps[i]; + RoaringBitmap y = other.bitmaps[i]; + if (x == null) { + return other; + } else if (y == null) { + return this; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + + c = y; // c1 = y2 + + i++; + + while (i < bitmaps.length) { + x = bitmaps[i]; + y = other.bitmaps[i]; + if (x == null || y == null) { + break; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + x.xor(c); // x3 = x2 xor c1 + + c.andNot(x); // c2 = c1 andnot x3 + c.or(y); // c3 = c2 or y2 + + i++; + } + + while (i < bitmaps.length) { + x = bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + i++; + } + + while (i < bitmaps.length) { + x = other.bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + bitmaps[i] = x; + + i++; + } + + if (i == bitmaps.length) { + other.overflow.forEach((value, freq) -> overflow.merge(value, freq, Integer::sum)); + + RoaringBatchIterator iter = c.getBatchIterator(); + int[] batch = new int[128]; + while (iter.hasNext()) { + int batchSize = iter.nextBatch(batch); + for (int j = 0; j < batchSize; j++) { + int value = batch[j]; + int freq = 1 << bitmaps.length; + overflow.merge(value, freq, Integer::sum); + } + } + } + + return this; + } + + public void normalize() { + overflow.replaceAll((value, freq) -> { + for (int k = 0; k < bitmaps.length; k++) { + if (bitmaps[k].checkedRemove(value)) { + freq += 1 << k; + } + } + return freq; + }); + } + + public int[] decode() { + int endIndex = 0; + while (endIndex < bitmaps.length && bitmaps[endIndex] != null) { + endIndex++; + } + + if (endIndex == 0) { + return new int[0]; + } + + int[] result = new int[1 << endIndex]; + + endIndex--; + + if (endIndex == 0) { + result[1] = bitmaps[0].getCardinality(); + } else { + RoaringBitmap highBits = bitmaps[endIndex]; + + decodeLowest(highBits, endIndex - 1, result); + decode(highBits, endIndex - 1, result, 1 << endIndex); + } + + return result; + } + + private void decodeLowest( + RoaringBitmap excludedBits, + int endIndex, + int[] result + ) { + if (endIndex == 0) { + result[1] = RoaringBitmap.andNotCardinality(bitmaps[0], excludedBits); + } else { + RoaringBitmap highBits = RoaringBitmap.andNot(bitmaps[endIndex], excludedBits); + excludedBits = RoaringBitmap.or(bitmaps[endIndex], excludedBits); + + decodeLowest(excludedBits, endIndex - 1, result); + decode(highBits, endIndex - 1, result, 1 << endIndex); + } + } + + private void decode( + RoaringBitmap includedBits, + int endIndex, + int[] result, + int resultOffset + ) { + if (endIndex == 0) { + result[resultOffset] = RoaringBitmap.andNotCardinality(includedBits, bitmaps[0]); + result[resultOffset + 1] = RoaringBitmap.andCardinality(includedBits, bitmaps[0]); + } else { + RoaringBitmap highBits = RoaringBitmap.and(includedBits, bitmaps[endIndex]); + RoaringBitmap lowBits = RoaringBitmap.andNot(includedBits, highBits); + + decode(lowBits, endIndex - 1, result, resultOffset); + decode(highBits, endIndex - 1, result, resultOffset + (1 << endIndex)); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter64.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter64.java new file mode 100644 index 000000000000..2fbc3b2c3129 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter64.java @@ -0,0 +1,337 @@ +package org.apache.solr.search.facet; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.solr.common.util.SimpleOrderedMap; +import org.roaringbitmap.longlong.LongIterator; +import org.roaringbitmap.longlong.Roaring64NavigableMap; + +/** + * Counts frequencies of ordinal values using Roaring Bitmaps. + */ +public class BitmapFrequencyCounter64 { + private final Roaring64NavigableMap[] bitmaps; + private final Map overflow; + + /** + * Constructs a new frequency counter. Frequencies greater than {@code (2^size)-1} will be represented as a HashMap + * (rather than a compact bitmap encoding), and for efficiency should not represent a large fraction of the distinct + * values to be counted. + * + * @param size The maximum size of the frequencies list + */ + public BitmapFrequencyCounter64(int size) { + this.bitmaps = new Roaring64NavigableMap[size]; + this.overflow = new HashMap<>(); + } + + /** + * An array of bitmaps encoding frequencies of values: the frequency of a value x is given by the sum of {@code 2^i} + * for all values of {@code i} where {@code bitmaps[i].contains(x)}. + * + * @return The encoded frequencies + */ + public Roaring64NavigableMap[] getBitmaps() { + return this.bitmaps; + } + + /** + * A map of high-frequency values (with {@code frequency >= 2^(bitmaps.length)}). + * + * @return The map of high-frequency values. + */ + public Map getOverflow() { + return this.overflow; + } + + /** + * Adds one occurrence of the given value to the counter. + * + * @param value The value to add + */ + public void add(long value) { + final Integer overflowCount = overflow.computeIfPresent(value, (v, f) -> f + 1); + if (overflowCount != null) { + return; + } + + // This is just binary addition x+1=y - we carry the value till we find an empty column + for (int i = 0; i < bitmaps.length; i++) { + Roaring64NavigableMap bitmap = bitmaps[i]; + if (bitmap == null) { + bitmap = bitmaps[i] = new Roaring64NavigableMap(); + } + + if (!bitmap.contains(value)) { + bitmap.add(value); + return; + } + } + + // If we reach this point, the frequency of this value is >= 2^(bitmaps.length) + + overflow.put(value, 1 << bitmaps.length); + } + + /** + * Serializes the counter. + * + * @return The serialized data + */ + public SimpleOrderedMap serialize() { + SimpleOrderedMap serialized = new SimpleOrderedMap<>(); + + List serializedBitmaps = new ArrayList<>(bitmaps.length); + + int i = 0; + while (i < bitmaps.length) { + Roaring64NavigableMap bitmap = bitmaps[i]; + if (bitmap == null) { + break; + } + + bitmap.runOptimize(); + serializedBitmaps.add(BitmapUtil.bitmapToBytes64(bitmap)); + + i++; + } + + if (i > 0) { + serialized.add("bitmaps", serializedBitmaps); + } + + if (!overflow.isEmpty()) { + serialized.add("overflow", overflow); + } + + return serialized; + } + + /** + * Populates the counter from the given serialized data. + * + * The counter must be fresh (with no values previously added), and have the same size as the counter from which the + * serialized data was generated. + * + * @param serialized The serialized data + */ + public void deserialize(SimpleOrderedMap serialized) { + List serializedBitmaps = (List) serialized.get("bitmaps"); + if (serializedBitmaps != null) { + for (int i = 0; i < serializedBitmaps.size(); i++) { + bitmaps[i] = BitmapUtil.bytesToBitmap64(serializedBitmaps.get(i)); + } + } + + Map overflow = (Map) serialized.get("overflow"); + if (overflow != null) { + this.overflow.putAll(overflow); + } + } + + /** + * Merges this counter with another (in-place). + * + * The other counter must have the same size as this counter. After this operation, the returned counter will contain + * the values from both counters with their frequencies added together, and references to either of the original + * counters should be discarded (since either may now be invalid, and one will have been modified and returned). + * + * @param other The counter to merge in + * @return The merged counter + */ + public BitmapFrequencyCounter64 merge(BitmapFrequencyCounter64 other) { + // The algorithm here is a ripple-carry adder in two dimensions, built from half-adders that are adapted from the + // standard (where s is the sum, and c the carried value): + // + // s = x xor y + // c = x and y + // + // to: + // + // s = x xor y + // c = y andnot s + // + // which allows in-place modification of bitmaps (x modified into s, y modified into c). + + Roaring64NavigableMap c; + + int i = 0; + + Roaring64NavigableMap x = bitmaps[i]; + Roaring64NavigableMap y = other.bitmaps[i]; + if (x == null) { + return other; + } else if (y == null) { + return this; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + + c = y; // c1 = y2 + + i++; + + while (i < bitmaps.length) { + x = bitmaps[i]; + y = other.bitmaps[i]; + if (x == null || y == null) { + break; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + x.xor(c); // x3 = x2 xor c1 + + c.andNot(x); // c2 = c1 andnot x3 + c.or(y); // c3 = c2 or y2 + + i++; + } + + while (i < bitmaps.length) { + x = bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + i++; + } + + while (i < bitmaps.length) { + x = other.bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + bitmaps[i] = x; + + i++; + } + + if (i == bitmaps.length) { + other.overflow.forEach((value, freq) -> { + overflow.merge(value, freq, Integer::sum); + }); + + LongIterator iter = c.getLongIterator(); + while (iter.hasNext()) { + long value = iter.next(); + int freq = 1 << bitmaps.length; + overflow.merge(value, freq, Integer::sum); + } + } + + return this; + } + + public void normalize() { + overflow.replaceAll((value, freq) -> { + for (int k = 0; k < bitmaps.length; k++) { + if (bitmaps[k].contains(value)) { + bitmaps[k].removeLong(value); + freq += 1 << k; + } + } + return freq; + }); + } + + public int[] decode() { + int endIndex = 0; + while (endIndex < bitmaps.length && bitmaps[endIndex] != null) { + endIndex++; + } + + if (endIndex == 0) { + return new int[0]; + } + + int[] result = new int[1 << endIndex]; + + endIndex--; + + if (endIndex == 0) { + result[1] = bitmaps[0].getIntCardinality(); + } else { + Roaring64NavigableMap highBits = bitmaps[endIndex]; + + decodeLowest(highBits, endIndex - 1, result); + decode(highBits, endIndex - 1, result, 1 << endIndex); + } + + return result; + } + + private void decodeLowest( + Roaring64NavigableMap excludedBits, + int endIndex, + int[] result + ) { + if (endIndex == 0) { + result[1] = andNotCardinality(bitmaps[0], excludedBits); + } else { + Roaring64NavigableMap highBits = andNot(bitmaps[endIndex], excludedBits); + excludedBits = or(bitmaps[endIndex], excludedBits); + + decodeLowest(excludedBits, endIndex - 1, result); + decode(highBits, endIndex - 1, result, 1 << endIndex); + } + } + + private void decode( + Roaring64NavigableMap includedBits, + int endIndex, + int[] result, + int resultOffset + ) { + if (endIndex == 0) { + result[resultOffset] = andNotCardinality(includedBits, bitmaps[0]); + result[resultOffset + 1] = andCardinality(includedBits, bitmaps[0]); + } else { + Roaring64NavigableMap highBits = and(includedBits, bitmaps[endIndex]); + Roaring64NavigableMap lowBits = andNot(includedBits, highBits); + + decode(lowBits, endIndex - 1, result, resultOffset); + decode(highBits, endIndex - 1, result, resultOffset + (1 << endIndex)); + } + } + + private static Roaring64NavigableMap or(Roaring64NavigableMap x1, Roaring64NavigableMap x2) { + Roaring64NavigableMap z = new Roaring64NavigableMap(); + z.or(x1); + z.or(x2); + return z; + } + + private static Roaring64NavigableMap and(Roaring64NavigableMap x1, Roaring64NavigableMap x2) { + Roaring64NavigableMap z = new Roaring64NavigableMap(); + z.or(x1); + z.and(x2); + return z; + } + + private static Roaring64NavigableMap andNot(Roaring64NavigableMap x1, Roaring64NavigableMap x2) { + Roaring64NavigableMap z = new Roaring64NavigableMap(); + z.or(x1); + z.andNot(x2); + return z; + } + + private static int andCardinality(Roaring64NavigableMap x1, Roaring64NavigableMap x2) { + return and(x1, x2).getIntCardinality(); + } + + private static int andNotCardinality(Roaring64NavigableMap x1, Roaring64NavigableMap x2) { + return andNot(x1, x2).getIntCardinality(); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java new file mode 100644 index 000000000000..739fc7bf6aaa --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java @@ -0,0 +1,52 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.function.IntFunction; + +import org.apache.lucene.queries.function.ValueSource; + +public class BitmapFrequencySlotAcc extends FuncSlotAcc { + private BitmapFrequencyCounter[] result; + private final int maxFrequency; + + public BitmapFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, int maxFrequency) { + super(values, fcontext, numSlots); + + this.result = new BitmapFrequencyCounter[numSlots]; + this.maxFrequency = maxFrequency; + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + if (result[slot] == null) { + result[slot] = new BitmapFrequencyCounter(this.maxFrequency); + } + result[slot].add(values.intVal(doc)); + } + + @Override + public int compare(int slotA, int slotB) { + throw new UnsupportedOperationException(); + } + + @Override + public Object getValue(int slotNum) { + if (result[slotNum] != null) { + return result[slotNum].serialize(); + } else { + return Collections.emptyList(); + } + } + + @Override + public void reset() { + Arrays.fill(result, null); + } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, null); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc64.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc64.java new file mode 100644 index 000000000000..26205e6558df --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc64.java @@ -0,0 +1,52 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.function.IntFunction; + +import org.apache.lucene.queries.function.ValueSource; + +public class BitmapFrequencySlotAcc64 extends FuncSlotAcc { + private BitmapFrequencyCounter64[] result; + private final int maxFrequency; + + public BitmapFrequencySlotAcc64(ValueSource values, FacetContext fcontext, int numSlots, int maxFrequency) { + super(values, fcontext, numSlots); + + this.result = new BitmapFrequencyCounter64[numSlots]; + this.maxFrequency = maxFrequency; + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + if (result[slot] == null) { + result[slot] = new BitmapFrequencyCounter64(this.maxFrequency); + } + result[slot].add(values.longVal(doc)); + } + + @Override + public int compare(int slotA, int slotB) { + throw new UnsupportedOperationException(); + } + + @Override + public Object getValue(int slotNum) { + if (result[slotNum] != null) { + return result[slotNum].serialize(); + } else { + return Collections.emptyList(); + } + } + + @Override + public void reset() { + Arrays.fill(result, null); + } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, null); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java new file mode 100644 index 000000000000..e2bb2c9c5180 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java @@ -0,0 +1,56 @@ +package org.apache.solr.search.facet; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.roaringbitmap.ImmutableBitmapDataProvider; +import org.roaringbitmap.RoaringBitmap; +import org.roaringbitmap.longlong.ImmutableLongBitmapDataProvider; +import org.roaringbitmap.longlong.Roaring64NavigableMap; + +public class BitmapUtil { + public static byte[] bitmapToBytes(ImmutableBitmapDataProvider bitmap) { + ByteBuffer buffer = ByteBuffer.allocate(bitmap.serializedSizeInBytes()); + bitmap.serialize(buffer); + return buffer.array(); + } + + public static RoaringBitmap bytesToBitmap(byte[] bytes) { + try { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.deserialize(ByteBuffer.wrap(bytes)); + return bitmap; + } catch (IOException ioe) { + throw new RuntimeException("Failed to deserialise RoaringBitmap from bytes", ioe); + } + } + + public static byte[] bitmapToBytes64(ImmutableLongBitmapDataProvider bitmap) { + try ( + ByteArrayOutputStream baos = new ByteArrayOutputStream((int) bitmap.serializedSizeInBytes()); + DataOutputStream dos = new DataOutputStream(baos) + ) { + bitmap.serialize(dos); + return baos.toByteArray(); + } catch (IOException ioe) { + throw new RuntimeException("Failed to serialise RoaringBitmap to bytes", ioe); + } + } + + public static Roaring64NavigableMap bytesToBitmap64(byte[] bytes) { + try ( + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + DataInputStream dis = new DataInputStream(bais) + ) { + Roaring64NavigableMap bitmap = new Roaring64NavigableMap(); + bitmap.deserialize(dis); + return bitmap; + } catch (IOException ioe) { + throw new RuntimeException("Failed to deserialise RoaringBitmap from bytes", ioe); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java new file mode 100644 index 000000000000..09d9ffc570ff --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java @@ -0,0 +1,97 @@ +package org.apache.solr.search.facet; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency-of-frequencies (number of values occurring x times) of ordinal values. + * + * The response is a map where the keys are frequencies (x = number of times a value occurred), and the values are + * the frequency-of-frequencies (number of values which occurred x times). + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class FrequencyOfFrequenciesAgg extends SimpleAggValueSource { + private final int size; + + public FrequencyOfFrequenciesAgg(ValueSource vs, Integer size) { + super("bitmapfreqfreq", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 8; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new FrequencyOfFrequenciesAgg(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + Map map = new LinkedHashMap<>(); + + result.normalize(); + + int[] lowFrequencies = result.decode(); + for (int i = 0; i < lowFrequencies.length; i++) { + int value = lowFrequencies[i]; + if (value > 0) { + map.put(i, value); + } + } + + result.getOverflow() + .forEach((value, freq) -> map.merge(freq, 1, Integer::sum)); + + return map; + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg64.java b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg64.java new file mode 100644 index 000000000000..43fa223109e1 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg64.java @@ -0,0 +1,97 @@ +package org.apache.solr.search.facet; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency-of-frequencies (number of values occurring x times) of ordinal values. + * + * The response is a map where the keys are frequencies (x = number of times a value occurred), and the values are + * the frequency-of-frequencies (number of values which occurred x times). + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class FrequencyOfFrequenciesAgg64 extends SimpleAggValueSource { + private final int size; + + public FrequencyOfFrequenciesAgg64(ValueSource vs, Integer size) { + super("bitmapfreqfreq64", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc64(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 8; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new FrequencyOfFrequenciesAgg64(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter64 result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter64(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter64 deserialized = new BitmapFrequencyCounter64(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + Map map = new LinkedHashMap<>(); + + result.normalize(); + + int[] lowFrequencies = result.decode(); + for (int i = 0; i < lowFrequencies.length; i++) { + int value = lowFrequencies[i]; + if (value > 0) { + map.put(i, value); + } + } + + result.getOverflow() + .forEach((value, freq) -> map.merge(freq, 1, Integer::sum)); + + return map; + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/TermFrequencyCounter.java b/solr/core/src/java/org/apache/solr/search/facet/TermFrequencyCounter.java new file mode 100644 index 000000000000..6831dadea799 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/TermFrequencyCounter.java @@ -0,0 +1,47 @@ +package org.apache.solr.search.facet; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.common.util.SimpleOrderedMap; + +public class TermFrequencyCounter { + private final Map counters; + + public TermFrequencyCounter() { + this.counters = new HashMap<>(); + } + + public Map getCounters() { + return this.counters; + } + + public void add(String value) { + counters.merge(value, 1, Integer::sum); + } + + public SimpleOrderedMap serialize() { + SimpleOrderedMap serialized = new SimpleOrderedMap<>(); + + if (!counters.isEmpty()) { + serialized.add("counters", counters); + } + + return serialized; + } + + public void deserialize(SimpleOrderedMap serialized) { + Map overflow = (Map) serialized.get("counters"); + if (overflow != null) { + this.counters.putAll(overflow); + } + } + + public TermFrequencyCounter merge(TermFrequencyCounter other) { + other.counters.forEach((value, freq) -> { + counters.merge(value, freq, Integer::sum); + }); + + return this; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/TermFrequencyOfFrequenciesAgg.java b/solr/core/src/java/org/apache/solr/search/facet/TermFrequencyOfFrequenciesAgg.java new file mode 100644 index 000000000000..1648f9873c73 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/TermFrequencyOfFrequenciesAgg.java @@ -0,0 +1,66 @@ +package org.apache.solr.search.facet; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +public class TermFrequencyOfFrequenciesAgg extends SimpleAggValueSource { + public TermFrequencyOfFrequenciesAgg(ValueSource vs) { + super("termfreqfreq", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new TermFrequencySlotAcc(getArg(), fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new TermFrequencyOfFrequenciesAgg(fp.parseValueSource()); + } + } + + private static class Merger extends FacetMerger { + private TermFrequencyCounter result; + + public Merger() { + this.result = new TermFrequencyCounter(); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + TermFrequencyCounter deserialized = new TermFrequencyCounter(); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + Map map = new LinkedHashMap<>(); + + result.getCounters() + .forEach((value, freq) -> map.merge(freq, 1, Integer::sum)); + + return map; + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/TermFrequencySlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/TermFrequencySlotAcc.java new file mode 100644 index 000000000000..3796ce7aa71b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/TermFrequencySlotAcc.java @@ -0,0 +1,50 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.function.IntFunction; + +import org.apache.lucene.queries.function.ValueSource; + +public class TermFrequencySlotAcc extends FuncSlotAcc { + private TermFrequencyCounter[] result; + + public TermFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots) { + super(values, fcontext, numSlots); + + this.result = new TermFrequencyCounter[numSlots]; + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + if (result[slot] == null) { + result[slot] = new TermFrequencyCounter(); + } + result[slot].add(values.strVal(doc)); + } + + @Override + public int compare(int slotA, int slotB) { + throw new UnsupportedOperationException(); + } + + @Override + public Object getValue(int slotNum) { + if (result[slotNum] != null) { + return result[slotNum].serialize(); + } else { + return Collections.emptyList(); + } + } + + @Override + public void reset() { + Arrays.fill(result, null); + } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, null); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/facet/BitmapFrequencyCounterTest.java b/solr/core/src/test/org/apache/solr/search/facet/BitmapFrequencyCounterTest.java new file mode 100644 index 000000000000..d33395f698ae --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/facet/BitmapFrequencyCounterTest.java @@ -0,0 +1,508 @@ +package org.apache.solr.search.facet; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.common.util.JavaBinCodec; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.junit.Test; +import org.roaringbitmap.RoaringBitmap; + +public class BitmapFrequencyCounterTest extends LuceneTestCase { + private static final int TEST_ORDINAL = 5; + + @Test + public void testAddValue() { + int iters = 10 * RANDOM_MULTIPLIER; + + for (int i = 0; i < iters; i++) { + int size = random().nextInt(8); + + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(size); + + int numValues = random().nextInt(100); + Map values = new HashMap<>(); + for (int j = 0; j < numValues; j++) { + int value = random().nextInt(); + int count = random().nextInt(256); + + addCount(counter, value, count); + + values.put(value, count); + } + + values.forEach((value, count) -> assertCount(counter, value, count)); + + counter.normalize(); + + values.forEach((value, count) -> assertCountNormalized(counter, value, count)); + } + } + + @Test + public void testMerge() { + int iters = 10 * RANDOM_MULTIPLIER; + + for (int i = 0; i < iters; i++) { + int size = random().nextInt(8); + + BitmapFrequencyCounter x = new BitmapFrequencyCounter(size); + + int numXValues = random().nextInt(100); + Map xValues = new HashMap<>(); + for (int j = 0; j < numXValues; j++) { + int value = random().nextInt(); + int count = random().nextInt(256); + + addCount(x, value, count); + + xValues.put(value, count); + } + + xValues.forEach((value, count) -> assertCount(x, value, count)); + + BitmapFrequencyCounter y = new BitmapFrequencyCounter(size); + + int numYValues = random().nextInt(100); + Map yValues = new HashMap<>(); + for (int j = 0; j < numYValues; j++) { + int value = random().nextInt(); + int count = random().nextInt(256); + + addCount(y, value, count); + + yValues.put(value, count); + } + + yValues.forEach((value, count) -> assertCount(y, value, count)); + + if (random().nextBoolean()) { + x.normalize(); + } + + if (random().nextBoolean()) { + y.normalize(); + } + + BitmapFrequencyCounter merged = x.merge(y); + + yValues.forEach((value, count) -> xValues.merge(value, count, Integer::sum)); + + xValues.forEach((value, count) -> assertCount(merged, value, count)); + } + } + + @Test(expected = NegativeArraySizeException.class) + public void givenNegativeSize_whenConstructingCounter() { + new BitmapFrequencyCounter(-1); + } + + @Test + public void givenSize0_whenAddingValue_withFrequency1() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(0); + + addCount(counter, TEST_ORDINAL, 1); + + assertCount(counter, TEST_ORDINAL, 1); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 1); + } + + @Test + public void givenSize0_whenAddingValue_withFrequency2() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(0); + + addCount(counter, TEST_ORDINAL, 2); + + assertCount(counter, TEST_ORDINAL, 2); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 2); + } + + @Test + public void givenSize1_whenAddingValue_withFrequency1() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(1); + + addCount(counter, TEST_ORDINAL, 1); + + assertCount(counter, TEST_ORDINAL, 1); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 1); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 2); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 1); + } + + @Test + public void givenSize1_whenAddingValue_withFrequency2() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(1); + + addCount(counter, TEST_ORDINAL, 2); + + assertCount(counter, TEST_ORDINAL, 2); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 2); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 2); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency1() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + addCount(counter, TEST_ORDINAL, 1); + + assertCount(counter, TEST_ORDINAL, 1); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 1); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 2); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 1); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency2() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + addCount(counter, TEST_ORDINAL, 2); + + assertCount(counter, TEST_ORDINAL, 2); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 2); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 1); + assertEquals(decoded[3], 0); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency3() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + addCount(counter, TEST_ORDINAL, 3); + + assertCount(counter, TEST_ORDINAL, 3); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 3); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 0); + assertEquals(decoded[3], 1); + } + + @Test + public void givenSize2_whenAddingValue_withFrequency4() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + addCount(counter, TEST_ORDINAL, 4); + + assertCount(counter, TEST_ORDINAL, 4); + + counter.normalize(); + + assertCountNormalized(counter, TEST_ORDINAL, 4); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 0); + assertEquals(decoded[3], 0); + } + + @Test + public void givenSize2_whenAddingMultipleValues() { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + addCount(counter, 101, 1); + addCount(counter, 102, 2); + addCount(counter, 202, 2); + addCount(counter, 103, 3); + addCount(counter, 203, 3); + addCount(counter, 303, 3); + + assertCount(counter, 101, 1); + assertCount(counter, 102, 2); + assertCount(counter, 202, 2); + assertCount(counter, 103, 3); + assertCount(counter, 203, 3); + assertCount(counter, 303, 3); + + counter.normalize(); + + assertCountNormalized(counter, 101, 1); + assertCountNormalized(counter, 102, 2); + assertCountNormalized(counter, 202, 2); + assertCountNormalized(counter, 103, 3); + assertCountNormalized(counter, 203, 3); + assertCountNormalized(counter, 303, 3); + + int[] decoded = counter.decode(); + + assertEquals(decoded.length, 4); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 1); + assertEquals(decoded[2], 2); + assertEquals(decoded[3], 3); + } + + @Test + public void givenSize2_whenMergingNonnormalizedValues() { + BitmapFrequencyCounter x = new BitmapFrequencyCounter(2); + BitmapFrequencyCounter y = new BitmapFrequencyCounter(2); + + addCount(x, TEST_ORDINAL, 2); + addCount(y, TEST_ORDINAL, 2); + + assertCount(x, TEST_ORDINAL, 2); + assertCount(y, TEST_ORDINAL, 2); + + x = x.merge(y); + + assertCount(x, TEST_ORDINAL, 4); + } + + @Test + public void givenSize2_whenMergingNormalizedValues() { + BitmapFrequencyCounter x = new BitmapFrequencyCounter(2); + BitmapFrequencyCounter y = new BitmapFrequencyCounter(2); + + addCount(x, TEST_ORDINAL, 2); + addCount(y, TEST_ORDINAL, 2); + + assertCount(x, TEST_ORDINAL, 2); + assertCount(y, TEST_ORDINAL, 2); + + x.normalize(); + y.normalize(); + + assertCountNormalized(x, TEST_ORDINAL, 2); + assertCountNormalized(y, TEST_ORDINAL, 2); + + x = x.merge(y); + + assertCount(x, TEST_ORDINAL, 4); + } + + @Test + public void givenSize4_whenMergingNonnormalizedValues() { + BitmapFrequencyCounter x = new BitmapFrequencyCounter(4); + BitmapFrequencyCounter y = new BitmapFrequencyCounter(4); + + addCount(x, TEST_ORDINAL, 10); + addCount(y, TEST_ORDINAL, 5); + + assertCount(x, TEST_ORDINAL, 10); + assertCount(y, TEST_ORDINAL, 5); + + x = x.merge(y); + + assertCount(x, TEST_ORDINAL, 15); + + x.normalize(); + + int[] decoded = x.decode(); + + assertEquals(decoded.length, 16); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 0); + assertEquals(decoded[3], 0); + assertEquals(decoded[4], 0); + assertEquals(decoded[5], 0); + assertEquals(decoded[6], 0); + assertEquals(decoded[7], 0); + assertEquals(decoded[8], 0); + assertEquals(decoded[9], 0); + assertEquals(decoded[10], 0); + assertEquals(decoded[11], 0); + assertEquals(decoded[12], 0); + assertEquals(decoded[13], 0); + assertEquals(decoded[14], 0); + assertEquals(decoded[15], 1); + } + + @Test + public void givenSize4_whenMergingNormalizedValues() { + BitmapFrequencyCounter x = new BitmapFrequencyCounter(4); + BitmapFrequencyCounter y = new BitmapFrequencyCounter(4); + + addCount(x, TEST_ORDINAL, 10); + addCount(y, TEST_ORDINAL, 5); + + assertCount(x, TEST_ORDINAL, 10); + assertCount(y, TEST_ORDINAL, 5); + + x.normalize(); + y.normalize(); + + assertCountNormalized(x, TEST_ORDINAL, 10); + assertCountNormalized(y, TEST_ORDINAL, 5); + + x = x.merge(y); + + assertCount(x, TEST_ORDINAL, 15); + + x.normalize(); + + int[] decoded = x.decode(); + + assertEquals(decoded.length, 16); + assertEquals(decoded[0], 0); + assertEquals(decoded[1], 0); + assertEquals(decoded[2], 0); + assertEquals(decoded[3], 0); + assertEquals(decoded[4], 0); + assertEquals(decoded[5], 0); + assertEquals(decoded[6], 0); + assertEquals(decoded[7], 0); + assertEquals(decoded[8], 0); + assertEquals(decoded[9], 0); + assertEquals(decoded[10], 0); + assertEquals(decoded[11], 0); + assertEquals(decoded[12], 0); + assertEquals(decoded[13], 0); + assertEquals(decoded[14], 0); + assertEquals(decoded[15], 1); + } + + @Test + public void testSerialization() throws IOException { + BitmapFrequencyCounter counter = new BitmapFrequencyCounter(2); + + addCount(counter, 101, 1); + addCount(counter, 102, 2); + addCount(counter, 202, 2); + addCount(counter, 103, 3); + addCount(counter, 203, 3); + addCount(counter, 303, 3); + + counter.normalize(); + + JavaBinCodec codec = new JavaBinCodec(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + codec.marshal(counter.serialize(), out); + + InputStream in = new ByteArrayInputStream(out.toByteArray()); + counter = new BitmapFrequencyCounter(2); + counter.deserialize((SimpleOrderedMap) codec.unmarshal(in)); + + assertCount(counter, 101, 1); + assertCount(counter, 102, 2); + assertCount(counter, 202, 2); + assertCount(counter, 103, 3); + assertCount(counter, 203, 3); + assertCount(counter, 303, 3); + } + + private static void addCount(BitmapFrequencyCounter counter, int value, int count) { + for (int i = 0; i < count; i++) { + counter.add(value); + } + } + + private static void assertCount(BitmapFrequencyCounter counter, int value, int count) { + RoaringBitmap[] bitmaps = counter.getBitmaps(); + + if (count >= (1 << bitmaps.length)) { + int overflowCount = count; + for (int i = 0; i < bitmaps.length; i++) { + if (bitmaps[i] != null && bitmaps[i].contains(value)) { + overflowCount -= 1 << i; + } + } + + assertEquals( + "Overflow should contain value " + value + " with overflow count " + overflowCount + " (for count " + count + ")", + (int) counter.getOverflow().getOrDefault(value, 0), overflowCount + ); + } else { + for (int i = 0; i < bitmaps.length; i++) { + if (((count >> i) & 1) == 1) { + assertTrue( + "bitmap " + i + " should contain value " + value + " (for count " + count + ")", + bitmaps[i].contains(value) + ); + } else if (bitmaps[i] != null) { + assertFalse( + "bitmap " + i + " should not contain value " + value + " (for count " + count + ")", + bitmaps[i].contains(value) + ); + } + } + } + } + + private static void assertCountNormalized(BitmapFrequencyCounter counter, int value, int count) { + RoaringBitmap[] bitmaps = counter.getBitmaps(); + + if (count >= (1 << bitmaps.length)) { + for (int i = 0; i < bitmaps.length; i++) { + if (bitmaps[i] != null) { + assertFalse( + "bitmap " + i + " should not contain value " + value + " (for count " + count + ")", + bitmaps[i].contains(value) + ); + } + } + + assertEquals( + "Overflow should contain value " + value + " (for count " + count + ")", + (int) counter.getOverflow().get(value), count + ); + } else { + for (int i = 0; i < bitmaps.length; i++) { + if (((count >> i) & 1) == 1) { + assertTrue( + "bitmap " + i + " should contain value " + value + " (for count " + count + ")", + bitmaps[i].contains(value) + ); + } else if (bitmaps[i] != null) { + assertFalse( + "bitmap " + i + " should not contain value " + value + " (for count " + count + ")", + bitmaps[i].contains(value) + ); + } + } + } + } +} diff --git a/solr/core/src/test/org/apache/solr/update/processor/AbstractAtomicUpdatesMultivalueTestBase.java b/solr/core/src/test/org/apache/solr/update/processor/AbstractAtomicUpdatesMultivalueTestBase.java index c5ce9a478e45..32850719bc97 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/AbstractAtomicUpdatesMultivalueTestBase.java +++ b/solr/core/src/test/org/apache/solr/update/processor/AbstractAtomicUpdatesMultivalueTestBase.java @@ -16,9 +16,6 @@ */ package org.apache.solr.update.processor; -import static org.hamcrest.CoreMatchers.hasItems; -import static org.hamcrest.CoreMatchers.not; - import java.io.IOException; import java.time.ZonedDateTime; import java.util.Arrays; @@ -29,7 +26,8 @@ import java.util.function.Function; import java.util.stream.Collectors; -import org.apache.curator.shaded.com.google.common.collect.Lists; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import org.apache.solr.EmbeddedSolrServerTestBase; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; @@ -40,7 +38,8 @@ import org.junit.Ignore; import org.junit.Test; -import com.google.common.collect.ImmutableMap; +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.CoreMatchers.not; public abstract class AbstractAtomicUpdatesMultivalueTestBase extends EmbeddedSolrServerTestBase { diff --git a/solr/test-framework/src/java/org/apache/solr/EmbeddedSolrServerTestBase.java b/solr/test-framework/src/java/org/apache/solr/EmbeddedSolrServerTestBase.java new file mode 100644 index 000000000000..8df8dea8ebf6 --- /dev/null +++ b/solr/test-framework/src/java/org/apache/solr/EmbeddedSolrServerTestBase.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.io.FileUtils; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.common.util.ContentStream; +import org.apache.solr.common.util.ContentStreamBase; +import org.apache.solr.common.util.ContentStreamBase.ByteArrayStream; +import org.apache.solr.util.ExternalPaths; +import org.junit.After; +import org.junit.AfterClass; + +import com.google.common.io.ByteStreams; + +abstract public class EmbeddedSolrServerTestBase extends SolrTestCaseJ4 { + + protected static final String DEFAULT_CORE_NAME = "collection1"; + + public static EmbeddedSolrServer client = null; + + @After + public synchronized void afterClass() throws Exception { + if (client != null) client.close(); + client = null; + } + + @AfterClass + public static void afterEmbeddedSolrServerTestBase() throws Exception { + + } + + public synchronized EmbeddedSolrServer getSolrClient() { + if (client == null) { + client = createNewSolrClient(); + } + return client; + } + + /** + * Create a new solr client. Subclasses should override for other options. + */ + public EmbeddedSolrServer createNewSolrClient() { + return new EmbeddedSolrServer(h.getCoreContainer(), DEFAULT_CORE_NAME) { + @Override + public void close() { + // do not close core container + } + }; + } + + public void upload(final String collection, final ContentStream... contents) { + final Path base = Paths.get(getSolrClient().getCoreContainer().getSolrHome(), collection); + writeTo(base, contents); + } + + private void writeTo(final Path base, final ContentStream... contents) { + try { + if (!Files.exists(base)) { + Files.createDirectories(base); + } + + for (final ContentStream content : contents) { + final File file = new File(base.toFile(), content.getName()); + file.getParentFile().mkdirs(); + + try (OutputStream os = new FileOutputStream(file)) { + ByteStreams.copy(content.getStream(), os); + } + } + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + + public Collection download(final String collection, final String... names) { + final Path base = Paths.get(getSolrClient().getCoreContainer().getSolrHome(), collection); + final List result = new ArrayList<>(); + + if (Files.exists(base)) { + for (final String name : names) { + final File file = new File(base.toFile(), name); + if (file.exists() && file.canRead()) { + try { + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + ByteStreams.copy(new FileInputStream(file), os); + final ByteArrayStream stream = new ContentStreamBase.ByteArrayStream(os.toByteArray(), name); + result.add(stream); + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + } + } + + return result; + } + + public static void initCore() throws Exception { + final String home = legacyExampleCollection1SolrHome(); + final String config = home + "/" + DEFAULT_CORE_NAME + "/conf/solrconfig.xml"; + final String schema = home + "/" + DEFAULT_CORE_NAME + "/conf/schema.xml"; + initCore(config, schema, home); + } + + public static String legacyExampleCollection1SolrHome() throws IOException { + final String sourceHome = ExternalPaths.SOURCE_HOME; + if (sourceHome == null) + throw new IllegalStateException("No source home! Cannot create the legacy example solr home directory."); + + final File tempSolrHome = LuceneTestCase.createTempDir().toFile(); + FileUtils.copyFileToDirectory(new File(sourceHome, "server/solr/solr.xml"), tempSolrHome); + final File collectionDir = new File(tempSolrHome, DEFAULT_CORE_NAME); + FileUtils.forceMkdir(collectionDir); + final File configSetDir = new File(sourceHome, "server/solr/configsets/sample_techproducts_configs/conf"); + FileUtils.copyDirectoryToDirectory(configSetDir, collectionDir); + + final Properties props = new Properties(); + props.setProperty("name", DEFAULT_CORE_NAME); + + try (Writer writer = new OutputStreamWriter(FileUtils.openOutputStream(new File(collectionDir, "core.properties")), + "UTF-8");) { + props.store(writer, null); + } + + return tempSolrHome.getAbsolutePath(); + } + +}