Skip to content

Commit

Permalink
bitmap agg to get freq of int32 vals
Browse files Browse the repository at this point in the history
  • Loading branch information
mkavanagh committed Sep 2, 2020
1 parent 74a66f9 commit 6b404df
Show file tree
Hide file tree
Showing 7 changed files with 1,019 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,15 @@
import org.apache.solr.search.facet.AggValueSource;
import org.apache.solr.search.facet.AvgAgg;
import org.apache.solr.search.facet.BitmapCollectorAgg;
import org.apache.solr.search.facet.BitmapFrequencyAgg;
import org.apache.solr.search.facet.CountAgg;
import org.apache.solr.search.facet.HLLAgg;
import org.apache.solr.search.facet.MinMaxAgg;
import org.apache.solr.search.facet.PercentileAgg;
import org.apache.solr.search.facet.RelatednessAgg;
import org.apache.solr.search.facet.StddevAgg;
import org.apache.solr.search.facet.SumAgg;
import org.apache.solr.search.facet.SumsqAgg;
import org.apache.solr.search.facet.RelatednessAgg;
import org.apache.solr.search.facet.TopDocsAgg;
import org.apache.solr.search.facet.UniqueAgg;
import org.apache.solr.search.facet.UniqueBlockAgg;
Expand Down Expand Up @@ -1059,6 +1060,8 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {

addParser("agg_bitmapcollector", new BitmapCollectorAgg.Parser());

addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser());

addParser("childfield", new ChildFieldValueSourceParser());
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package org.apache.solr.search.facet;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
Expand Down Expand Up @@ -73,7 +71,7 @@ public Object getValue(int slotNum) {
byte[] serialised;
if (result[slotNum] != null) {
result[slotNum].runOptimize();
serialised = bitmapToBytes(result[slotNum]);
serialised = BitmapUtil.bitmapToBytes(result[slotNum]);
} else {
serialised = new byte[0];
}
Expand Down Expand Up @@ -116,20 +114,9 @@ public void finish(Context mcontext) {
public Object getMergedResult() {
combined.runOptimize();
SimpleOrderedMap map = new SimpleOrderedMap();
map.add(KEY, bitmapToBytes(combined));
map.add(KEY, BitmapUtil.bitmapToBytes(combined));
return map;
}
}

private static byte[] bitmapToBytes(MutableRoaringBitmap bitmap) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bos);
try {
bitmap.serialize(dos);
dos.close();
return bos.toByteArray();
} catch (IOException ioe) {
throw new RuntimeException("Failed to serialise RoaringBitmap to bytes", ioe);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package org.apache.solr.search.facet;

import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;

/**
* Calculates the frequency of ordinal values using Roaring Bitmaps.
*
* The response is a map with the following fields:
* - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values
* of {@code i} where {@code bitmaps[i].contains(x)}
* - overflow: a map of ordinal values to frequencies, for values with {@code frequency >= 2^(bitmaps.length)}
*
* Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting.
*/
public class BitmapFrequencyAgg extends SimpleAggValueSource {
private final int size;

public BitmapFrequencyAgg(ValueSource vs, int size) {
super("bitmapfreq", vs);

this.size = size;
}

@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size);
}

@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger(size);
}

public static class Parser extends ValueSourceParser {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource valueSource = fp.parseValueSource();

int size = 16;
if (fp.hasMoreArguments()) {
size = fp.parseInt();
}

return new BitmapFrequencyAgg(valueSource, size);
}
}

private static class Merger extends FacetMerger {
private final int size;
private BitmapFrequencyCounter result;

public Merger(int size) {
this.size = size;
this.result = new BitmapFrequencyCounter(size);
}

@Override
public void merge(Object facetResult, Context mcontext) {
if (facetResult instanceof SimpleOrderedMap) {
BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size);
deserialized.deserialize((SimpleOrderedMap<Object>) facetResult);

result = result.merge(deserialized);
}
}

@Override
public void finish(Context mcontext) {
// never called
}

@Override
public Object getMergedResult() {
result.normalize();
return result.serialize();
}
}
}
Loading

0 comments on commit 6b404df

Please sign in to comment.