Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] Bitmap-based frequency aggregations #37

Open
wants to merge 12 commits into
base: bw_branch_7_7_2
Choose a base branch
from
2 changes: 2 additions & 0 deletions solr/core/ivy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
</configurations>

<dependencies>
<dependency org="org.roaringbitmap" name="RoaringBitmap" rev="0.8.6" conf="compile"/>
<dependency org="org.roaringbitmap" name="shims" rev="0.8.6" conf="compile"/>
<dependency org="commons-codec" name="commons-codec" rev="${/commons-codec/commons-codec}" conf="compile"/>
<dependency org="commons-io" name="commons-io" rev="${/commons-io/commons-io}" conf="compile"/>
<dependency org="org.apache.commons" name="commons-exec" rev="${/org.apache.commons/commons-exec}" conf="compile"/>
Expand Down
18 changes: 17 additions & 1 deletion solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,18 @@
import org.apache.solr.schema.TextField;
import org.apache.solr.search.facet.AggValueSource;
import org.apache.solr.search.facet.AvgAgg;
import org.apache.solr.search.facet.BitmapCollectorAgg;
import org.apache.solr.search.facet.BitmapFrequencyAgg;
import org.apache.solr.search.facet.CountAgg;
import org.apache.solr.search.facet.FrequencyOfFrequenciesAgg;
import org.apache.solr.search.facet.HLLAgg;
import org.apache.solr.search.facet.MinMaxAgg;
import org.apache.solr.search.facet.PercentileAgg;
import org.apache.solr.search.facet.RelatednessAgg;
import org.apache.solr.search.facet.StddevAgg;
import org.apache.solr.search.facet.SumAgg;
import org.apache.solr.search.facet.SumsqAgg;
import org.apache.solr.search.facet.RelatednessAgg;
import org.apache.solr.search.facet.TermFrequencyOfFrequenciesAgg;
import org.apache.solr.search.facet.TopDocsAgg;
import org.apache.solr.search.facet.UniqueAgg;
import org.apache.solr.search.facet.UniqueBlockAgg;
Expand Down Expand Up @@ -1056,6 +1060,18 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {

addParser("agg_topdocs", new TopDocsAgg.Parser());

addParser("agg_bitmapcollector", new BitmapCollectorAgg.Parser());

addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser());

addParser("agg_bitmapfreq64", new BitmapFrequencyAgg.Parser());

addParser("agg_bitmapfreqfreq", new FrequencyOfFrequenciesAgg.Parser());

addParser("agg_bitmapfreqfreq64", new FrequencyOfFrequenciesAgg.Parser());

addParser("agg_termfreqfreq", new TermFrequencyOfFrequenciesAgg.Parser());

addParser("childfield", new ChildFieldValueSourceParser());
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package org.apache.solr.search.facet;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.function.IntFunction;

import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.roaringbitmap.buffer.MutableRoaringBitmap;


public class BitmapCollectorAgg extends SimpleAggValueSource {

private static final String KEY = "bitmap";

public static class Parser extends ValueSourceParser {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
return new BitmapCollectorAgg(fp.parseValueSource());
}
}

public BitmapCollectorAgg(ValueSource vs) {
super("bitmapcollector", vs);
}

@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
return new Acc(getArg(), fcontext, numSlots);
}

@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger();
}

@Override
public String description() {
return "bitmapcollector";
}


private class Acc extends FuncSlotAcc {
MutableRoaringBitmap[] result;

Acc(ValueSource vs, FacetContext fcontext, int numSlots) {
super(vs, fcontext, numSlots);
this.result = new MutableRoaringBitmap[numSlots];
}

@Override
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
if (result[slot] == null) {
result[slot] = new MutableRoaringBitmap();
}
result[slot].add(values.intVal(doc));
}

@Override
public int compare(int slotA, int slotB) {
return slotA - slotB;
}

@Override
public Object getValue(int slotNum) {
byte[] serialised;
if (result[slotNum] != null) {
result[slotNum].runOptimize();
serialised = BitmapUtil.bitmapToBytes(result[slotNum]);
} else {
serialised = new byte[0];
}
SimpleOrderedMap map = new SimpleOrderedMap();
map.add(KEY, serialised);
return map;
}

@Override
public void reset() {
Arrays.fill(result, null);
}

@Override
public void resize(Resizer resizer) {
result = resizer.resize(result, null);
}
}

public class Merger extends FacetMerger {

private MutableRoaringBitmap combined = new MutableRoaringBitmap();

@Override
public void merge(Object facetResult, Context mcontext) {
if (facetResult instanceof SimpleOrderedMap) {
byte[] bitmapBytes = (byte[])((SimpleOrderedMap)facetResult).get(KEY);
if (bitmapBytes.length != 0) {
combined.or(new ImmutableRoaringBitmap(ByteBuffer.wrap(bitmapBytes)));
}
}
}

@Override
public void finish(Context mcontext) {
// never called
}

@Override
public Object getMergedResult() {
combined.runOptimize();
SimpleOrderedMap map = new SimpleOrderedMap();
map.add(KEY, BitmapUtil.bitmapToBytes(combined));
return map;
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package org.apache.solr.search.facet;

import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;

/**
* Calculates the frequency of ordinal values using Roaring Bitmaps.
*
* The response is a map with the following fields:
* - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values
* of {@code i} where {@code bitmaps[i].contains(x)}
* - overflow: a map of ordinal values to frequencies, for values with {@code frequency >= 2^(bitmaps.length)}
*
* Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting.
*/
public class BitmapFrequencyAgg extends SimpleAggValueSource {
private final int size;

public BitmapFrequencyAgg(ValueSource vs, int size) {
super("bitmapfreq", vs);

this.size = size;
}

@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size);
}

@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger(size);
}

public static class Parser extends ValueSourceParser {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource valueSource = fp.parseValueSource();

int size = 16;
if (fp.hasMoreArguments()) {
size = fp.parseInt();
}

return new BitmapFrequencyAgg(valueSource, size);
}
}

private static class Merger extends FacetMerger {
private final int size;
private BitmapFrequencyCounter result;

public Merger(int size) {
this.size = size;
this.result = new BitmapFrequencyCounter(size);
}

@Override
public void merge(Object facetResult, Context mcontext) {
if (facetResult instanceof SimpleOrderedMap) {
BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size);
deserialized.deserialize((SimpleOrderedMap<Object>) facetResult);

result = result.merge(deserialized);
}
}

@Override
public void finish(Context mcontext) {
// never called
}

@Override
public Object getMergedResult() {
result.normalize();
return result.serialize();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package org.apache.solr.search.facet;

import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;

/**
* Calculates the frequency of ordinal values using Roaring Bitmaps.
*
* The response is a map with the following fields:
* - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values
* of {@code i} where {@code bitmaps[i].contains(x)}
* - overflow: a map of ordinal values to frequencies, for values with {@code frequency >= 2^(bitmaps.length)}
*
* Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting.
*/
public class BitmapFrequencyAgg64 extends SimpleAggValueSource {
private final int size;

public BitmapFrequencyAgg64(ValueSource vs, int size) {
super("bitmapfreq64", vs);

this.size = size;
}

@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
return new BitmapFrequencySlotAcc64(getArg(), fcontext, numSlots, size);
}

@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger(size);
}

public static class Parser extends ValueSourceParser {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource valueSource = fp.parseValueSource();

int size = 16;
if (fp.hasMoreArguments()) {
size = fp.parseInt();
}

return new BitmapFrequencyAgg64(valueSource, size);
}
}

private static class Merger extends FacetMerger {
private final int size;
private BitmapFrequencyCounter64 result;

public Merger(int size) {
this.size = size;
this.result = new BitmapFrequencyCounter64(size);
}

@Override
public void merge(Object facetResult, Context mcontext) {
if (facetResult instanceof SimpleOrderedMap) {
BitmapFrequencyCounter64 deserialized = new BitmapFrequencyCounter64(size);
deserialized.deserialize((SimpleOrderedMap<Object>) facetResult);

result = result.merge(deserialized);
}
}

@Override
public void finish(Context mcontext) {
// never called
}

@Override
public Object getMergedResult() {
result.normalize();
return result.serialize();
}
}
}
Loading