diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index c1fa05f580ea..e122eced3b59 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -57,14 +57,16 @@ import org.apache.solr.search.facet.AggValueSource; import org.apache.solr.search.facet.AvgAgg; import org.apache.solr.search.facet.BitmapCollectorAgg; +import org.apache.solr.search.facet.BitmapFrequencyAgg; import org.apache.solr.search.facet.CountAgg; +import org.apache.solr.search.facet.FrequencyOfFrequenciesAgg; import org.apache.solr.search.facet.HLLAgg; import org.apache.solr.search.facet.MinMaxAgg; import org.apache.solr.search.facet.PercentileAgg; +import org.apache.solr.search.facet.RelatednessAgg; import org.apache.solr.search.facet.StddevAgg; import org.apache.solr.search.facet.SumAgg; import org.apache.solr.search.facet.SumsqAgg; -import org.apache.solr.search.facet.RelatednessAgg; import org.apache.solr.search.facet.TopDocsAgg; import org.apache.solr.search.facet.UniqueAgg; import org.apache.solr.search.facet.UniqueBlockAgg; @@ -1059,6 +1061,10 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError { addParser("agg_bitmapcollector", new BitmapCollectorAgg.Parser()); + addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser()); + + addParser("agg_bitmapfreqfreq", new FrequencyOfFrequenciesAgg.Parser()); + addParser("childfield", new ChildFieldValueSourceParser()); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java index f107582ee429..3259af4f7707 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapCollectorAgg.java @@ -1,7 +1,5 @@ package org.apache.solr.search.facet; -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; @@ -73,7 +71,7 @@ public Object getValue(int slotNum) { byte[] serialised; if (result[slotNum] != null) { result[slotNum].runOptimize(); - serialised = bitmapToBytes(result[slotNum]); + serialised = BitmapUtil.bitmapToBytes(result[slotNum]); } else { serialised = new byte[0]; } @@ -116,20 +114,9 @@ public void finish(Context mcontext) { public Object getMergedResult() { combined.runOptimize(); SimpleOrderedMap map = new SimpleOrderedMap(); - map.add(KEY, bitmapToBytes(combined)); + map.add(KEY, BitmapUtil.bitmapToBytes(combined)); return map; } } - private static byte[] bitmapToBytes(MutableRoaringBitmap bitmap) { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(bos); - try { - bitmap.serialize(dos); - dos.close(); - return bos.toByteArray(); - } catch (IOException ioe) { - throw new RuntimeException("Failed to serialise RoaringBitmap to bytes", ioe); - } - } } diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java new file mode 100644 index 000000000000..cda7afb0ee80 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java @@ -0,0 +1,81 @@ +package org.apache.solr.search.facet; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency of ordinal values using Roaring Bitmaps. + * + * The response is a map with the following fields: + * - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values + * of {@code i} where {@code bitmaps[i].contains(x)} + * - overflow: a bitmap of ordinal values with {@code frequency >= 2^(bitmaps.length)} + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class BitmapFrequencyAgg extends SimpleAggValueSource { + private final int size; + + public BitmapFrequencyAgg(ValueSource vs, int size) { + super("bitmapfreq", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 16; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new BitmapFrequencyAgg(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + return result.serialize(); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java new file mode 100644 index 000000000000..343b813711f2 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java @@ -0,0 +1,226 @@ +package org.apache.solr.search.facet; + +import org.apache.solr.common.util.SimpleOrderedMap; +import org.roaringbitmap.RoaringBitmap; + +/** + * Counts frequencies of ordinal values using Roaring Bitmaps. + */ +public class BitmapFrequencyCounter { + private final RoaringBitmap[] bitmaps; + private RoaringBitmap overflow; + + /** + * Constructs a new frequency counter. The maximum countable frequency will be given by {@code (2^size)-1}. + * + * @param size The maximum size of the frequencies list + */ + public BitmapFrequencyCounter(int size) { + this.bitmaps = new RoaringBitmap[size]; + } + + /** + * An array of bitmaps encoding frequencies of values: the frequency of a value x is given by the sum of {@code 2^i} + * for all values of {@code i} where {@code bitmaps[i].contains(x)}. + * + * @return The encoded frequencies + */ + public RoaringBitmap[] getBitmaps() { + return this.bitmaps; + } + + /** + * The overflow set of all values with {@code frequency >= 2^(bitmaps.length)}. + * + * @return The overflow set + */ + public RoaringBitmap getOverflow() { + return this.overflow; + } + + /** + * Adds one occurrence of the given value to the counter. + * + * @param value The value to add + */ + public void add(int value) { + // This is just binary addition x+1=y - we carry the value till we find an empty column + for (int i = 0; i < bitmaps.length; i++) { + RoaringBitmap bitmap = bitmaps[i]; + if (bitmap == null) { + bitmap = bitmaps[i] = new RoaringBitmap(); + } + + if (!bitmap.contains(value)) { + bitmap.add(value); + return; + } + + bitmap.remove(value); + } + + // If we reach this point, the frequency of this value is >= 2^(bitmaps.length) + + if (overflow == null) { + overflow = new RoaringBitmap(); + } + + overflow.add(value); + } + + /** + * Serializes the counter. + * + * @return The serialized data + */ + public SimpleOrderedMap serialize() { + SimpleOrderedMap serialized = new SimpleOrderedMap<>(); + + byte[][] serializedBitmaps = new byte[bitmaps.length][]; + + int i = 0; + while (i < bitmaps.length) { + RoaringBitmap bitmap = bitmaps[i]; + if (bitmap == null) { + break; + } + + bitmap.runOptimize(); + serializedBitmaps[i] = BitmapUtil.bitmapToBytes(bitmap); + + i++; + } + + if (i > 0) { + serialized.add("bitmaps", serializedBitmaps); + } + + if (overflow != null) { + overflow.runOptimize(); + serialized.add("overflow", BitmapUtil.bitmapToBytes(overflow)); + } + + return serialized; + } + + /** + * Populates the counter from the given serialized data. + * + * The counter must be fresh (with no values previously added), and have the same size as the counter from which the + * serialized data was generated. + * + * @param serialized The serialized data + */ + public void deserialize(SimpleOrderedMap serialized) { + byte[][] serializedBitmaps = (byte[][]) serialized.get("bitmaps"); + if (serializedBitmaps != null) { + for (int i = 0; i < bitmaps.length; i++) { + bitmaps[i] = BitmapUtil.bytesToBitmap(serializedBitmaps[i]); + } + } + + byte[] overflow = (byte[]) serialized.get("overflow"); + if (overflow != null) { + this.overflow = BitmapUtil.bytesToBitmap(overflow); + } else { + this.overflow = null; + } + } + + /** + * Merges this counter with another (in-place). + * + * The other counter must have the same size as this counter. After this operation, the returned counter will contain + * the values from both counters with their frequencies added together, and references to either of the original + * counters should be discarded (since either may now be invalid, and one will have been modified and returned). + * + * @param other The counter to merge in + * @return The merged counter + */ + public BitmapFrequencyCounter merge(BitmapFrequencyCounter other) { + // The algorithm here is a ripple-carry adder in two dimensions, built from half-adders that are adapted from the + // standard (where s is the sum, and c the carried value): + // + // s = x xor y + // c = x and y + // + // to: + // + // s = x xor y + // c = y andnot s + // + // which allows in-place modification of bitmaps (x modified into s, y modified into c). + + RoaringBitmap c; + + int i = 0; + + RoaringBitmap x = bitmaps[i]; + RoaringBitmap y = other.bitmaps[i]; + if (x == null) { + return other; + } else if (y == null) { + return this; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + + c = y; // c1 = y2 + + i++; + + while (i < bitmaps.length) { + x = bitmaps[i]; + y = other.bitmaps[i]; + if (x == null || y == null) { + break; + } + + x.xor(y); // x2 = x1 xor y1 + y.andNot(x); // y2 = y1 andnot x2 + x.xor(c); // x3 = x2 xor c1 + + c.andNot(x); // c2 = c1 andnot x3 + c.or(y); // c3 = c2 or y2 + + i++; + } + + while (i < bitmaps.length) { + x = bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + i++; + } + + while (i < bitmaps.length) { + x = other.bitmaps[i]; + if (x == null) { + break; + } + + x.xor(c); // x2 = x1 xor c1 + c.andNot(x); // c2 = c1 andnot x2 + + bitmaps[i] = x; + + i++; + } + + if (i == bitmaps.length) { + if (overflow == null) { + overflow = c; + } else { + overflow.or(c); + } + } + + return this; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java new file mode 100644 index 000000000000..739fc7bf6aaa --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java @@ -0,0 +1,52 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.function.IntFunction; + +import org.apache.lucene.queries.function.ValueSource; + +public class BitmapFrequencySlotAcc extends FuncSlotAcc { + private BitmapFrequencyCounter[] result; + private final int maxFrequency; + + public BitmapFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, int maxFrequency) { + super(values, fcontext, numSlots); + + this.result = new BitmapFrequencyCounter[numSlots]; + this.maxFrequency = maxFrequency; + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + if (result[slot] == null) { + result[slot] = new BitmapFrequencyCounter(this.maxFrequency); + } + result[slot].add(values.intVal(doc)); + } + + @Override + public int compare(int slotA, int slotB) { + throw new UnsupportedOperationException(); + } + + @Override + public Object getValue(int slotNum) { + if (result[slotNum] != null) { + return result[slotNum].serialize(); + } else { + return Collections.emptyList(); + } + } + + @Override + public void reset() { + Arrays.fill(result, null); + } + + @Override + public void resize(Resizer resizer) { + result = resizer.resize(result, null); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java b/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java new file mode 100644 index 000000000000..ba38f64f644e --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java @@ -0,0 +1,25 @@ +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.roaringbitmap.ImmutableBitmapDataProvider; +import org.roaringbitmap.RoaringBitmap; + +public class BitmapUtil { + public static byte[] bitmapToBytes(ImmutableBitmapDataProvider bitmap) { + ByteBuffer buffer = ByteBuffer.allocate(bitmap.serializedSizeInBytes()); + bitmap.serialize(buffer); + return buffer.array(); + } + + public static RoaringBitmap bytesToBitmap(byte[] bytes) { + try { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.deserialize(ByteBuffer.wrap(bytes)); + return bitmap; + } catch (IOException ioe) { + throw new RuntimeException("Failed to deserialise RoaringBitmap from bytes", ioe); + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java new file mode 100644 index 000000000000..3b451e46f998 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java @@ -0,0 +1,85 @@ +package org.apache.solr.search.facet; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency-of-frequencies (number of values occurring x times) of ordinal values. + * + * The response is a map with the following fields: + * - frequencies: an array where {@code frequencies[i]} is the number of values with {@code frequency = i + 1} (omitted + * if empty) + * - overflow: the number of values with {@code frequency > frequencies.length} + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class FrequencyOfFrequenciesAgg extends SimpleAggValueSource { + private final int size; + + public FrequencyOfFrequenciesAgg(ValueSource vs, Integer size) { + super("bitmapfreqfreq", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 16; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new FrequencyOfFrequenciesAgg(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + SimpleOrderedMap map = new SimpleOrderedMap<>(); + + // TODO + + return map; + } + } +}