forked from apache/lucene-solr
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
663 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import org.apache.lucene.queries.function.ValueSource; | ||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.apache.solr.search.FunctionQParser; | ||
import org.apache.solr.search.SyntaxError; | ||
import org.apache.solr.search.ValueSourceParser; | ||
|
||
/** | ||
* Calculates the frequency of ordinal values using Roaring Bitmaps. | ||
* | ||
* The response is a map with the following fields: | ||
* - bitmaps: an array of bitmaps, where the frequency of a value x is given by the sum of {@code 2^i} for all values | ||
* of {@code i} where {@code bitmaps[i].contains(x)} | ||
* - overflow: a bitmap of ordinal values with {@code frequency >= 2^(bitmaps.length)} | ||
* | ||
* Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. | ||
*/ | ||
public class BitmapFrequencyAgg extends SimpleAggValueSource { | ||
private final int size; | ||
|
||
public BitmapFrequencyAgg(ValueSource vs, int size) { | ||
super("bitmapfreq", vs); | ||
|
||
this.size = size; | ||
} | ||
|
||
@Override | ||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { | ||
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); | ||
} | ||
|
||
@Override | ||
public FacetMerger createFacetMerger(Object prototype) { | ||
return new Merger(size); | ||
} | ||
|
||
public static class Parser extends ValueSourceParser { | ||
@Override | ||
public ValueSource parse(FunctionQParser fp) throws SyntaxError { | ||
ValueSource valueSource = fp.parseValueSource(); | ||
|
||
int size = 16; | ||
if (fp.hasMoreArguments()) { | ||
size = fp.parseInt(); | ||
} | ||
|
||
return new BitmapFrequencyAgg(valueSource, size); | ||
} | ||
} | ||
|
||
private static class Merger extends FacetMerger { | ||
private final int size; | ||
private BitmapFrequencyCounter result; | ||
|
||
public Merger(int size) { | ||
this.size = size; | ||
this.result = new BitmapFrequencyCounter(size); | ||
} | ||
|
||
@Override | ||
public void merge(Object facetResult, Context mcontext) { | ||
if (facetResult instanceof SimpleOrderedMap) { | ||
BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); | ||
deserialized.deserialize((SimpleOrderedMap<Object>) facetResult); | ||
|
||
result = result.merge(deserialized); | ||
} | ||
} | ||
|
||
@Override | ||
public void finish(Context mcontext) { | ||
// never called | ||
} | ||
|
||
@Override | ||
public Object getMergedResult() { | ||
return result.serialize(); | ||
} | ||
} | ||
} |
226 changes: 226 additions & 0 deletions
226
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyCounter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,226 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.roaringbitmap.RoaringBitmap; | ||
|
||
/** | ||
* Counts frequencies of ordinal values using Roaring Bitmaps. | ||
*/ | ||
public class BitmapFrequencyCounter { | ||
private final RoaringBitmap[] bitmaps; | ||
private RoaringBitmap overflow; | ||
|
||
/** | ||
* Constructs a new frequency counter. The maximum countable frequency will be given by {@code (2^size)-1}. | ||
* | ||
* @param size The maximum size of the frequencies list | ||
*/ | ||
public BitmapFrequencyCounter(int size) { | ||
this.bitmaps = new RoaringBitmap[size]; | ||
} | ||
|
||
/** | ||
* An array of bitmaps encoding frequencies of values: the frequency of a value x is given by the sum of {@code 2^i} | ||
* for all values of {@code i} where {@code bitmaps[i].contains(x)}. | ||
* | ||
* @return The encoded frequencies | ||
*/ | ||
public RoaringBitmap[] getBitmaps() { | ||
return this.bitmaps; | ||
} | ||
|
||
/** | ||
* The overflow set of all values with {@code frequency >= 2^(bitmaps.length)}. | ||
* | ||
* @return The overflow set | ||
*/ | ||
public RoaringBitmap getOverflow() { | ||
return this.overflow; | ||
} | ||
|
||
/** | ||
* Adds one occurrence of the given value to the counter. | ||
* | ||
* @param value The value to add | ||
*/ | ||
public void add(int value) { | ||
// This is just binary addition x+1=y - we carry the value till we find an empty column | ||
for (int i = 0; i < bitmaps.length; i++) { | ||
RoaringBitmap bitmap = bitmaps[i]; | ||
if (bitmap == null) { | ||
bitmap = bitmaps[i] = new RoaringBitmap(); | ||
} | ||
|
||
if (!bitmap.contains(value)) { | ||
bitmap.add(value); | ||
return; | ||
} | ||
|
||
bitmap.remove(value); | ||
} | ||
|
||
// If we reach this point, the frequency of this value is >= 2^(bitmaps.length) | ||
|
||
if (overflow == null) { | ||
overflow = new RoaringBitmap(); | ||
} | ||
|
||
overflow.add(value); | ||
} | ||
|
||
/** | ||
* Serializes the counter. | ||
* | ||
* @return The serialized data | ||
*/ | ||
public SimpleOrderedMap<Object> serialize() { | ||
SimpleOrderedMap<Object> serialized = new SimpleOrderedMap<>(); | ||
|
||
byte[][] serializedBitmaps = new byte[bitmaps.length][]; | ||
|
||
int i = 0; | ||
while (i < bitmaps.length) { | ||
RoaringBitmap bitmap = bitmaps[i]; | ||
if (bitmap == null) { | ||
break; | ||
} | ||
|
||
bitmap.runOptimize(); | ||
serializedBitmaps[i] = BitmapUtil.bitmapToBytes(bitmap); | ||
|
||
i++; | ||
} | ||
|
||
if (i > 0) { | ||
serialized.add("bitmaps", serializedBitmaps); | ||
} | ||
|
||
if (overflow != null) { | ||
overflow.runOptimize(); | ||
serialized.add("overflow", BitmapUtil.bitmapToBytes(overflow)); | ||
} | ||
|
||
return serialized; | ||
} | ||
|
||
/** | ||
* Populates the counter from the given serialized data. | ||
* | ||
* The counter must be fresh (with no values previously added), and have the same size as the counter from which the | ||
* serialized data was generated. | ||
* | ||
* @param serialized The serialized data | ||
*/ | ||
public void deserialize(SimpleOrderedMap<Object> serialized) { | ||
byte[][] serializedBitmaps = (byte[][]) serialized.get("bitmaps"); | ||
if (serializedBitmaps != null) { | ||
for (int i = 0; i < bitmaps.length; i++) { | ||
bitmaps[i] = BitmapUtil.bytesToBitmap(serializedBitmaps[i]); | ||
} | ||
} | ||
|
||
byte[] overflow = (byte[]) serialized.get("overflow"); | ||
if (overflow != null) { | ||
this.overflow = BitmapUtil.bytesToBitmap(overflow); | ||
} else { | ||
this.overflow = null; | ||
} | ||
} | ||
|
||
/** | ||
* Merges this counter with another (in-place). | ||
* | ||
* The other counter must have the same size as this counter. After this operation, the returned counter will contain | ||
* the values from both counters with their frequencies added together, and references to either of the original | ||
* counters should be discarded (since either may now be invalid, and one will have been modified and returned). | ||
* | ||
* @param other The counter to merge in | ||
* @return The merged counter | ||
*/ | ||
public BitmapFrequencyCounter merge(BitmapFrequencyCounter other) { | ||
// The algorithm here is a ripple-carry adder in two dimensions, built from half-adders that are adapted from the | ||
// standard (where s is the sum, and c the carried value): | ||
// | ||
// s = x xor y | ||
// c = x and y | ||
// | ||
// to: | ||
// | ||
// s = x xor y | ||
// c = y andnot s | ||
// | ||
// which allows in-place modification of bitmaps (x modified into s, y modified into c). | ||
|
||
RoaringBitmap c; | ||
|
||
int i = 0; | ||
|
||
RoaringBitmap x = bitmaps[i]; | ||
RoaringBitmap y = other.bitmaps[i]; | ||
if (x == null) { | ||
return other; | ||
} else if (y == null) { | ||
return this; | ||
} | ||
|
||
x.xor(y); // x2 = x1 xor y1 | ||
y.andNot(x); // y2 = y1 andnot x2 | ||
|
||
c = y; // c1 = y2 | ||
|
||
i++; | ||
|
||
while (i < bitmaps.length) { | ||
x = bitmaps[i]; | ||
y = other.bitmaps[i]; | ||
if (x == null || y == null) { | ||
break; | ||
} | ||
|
||
x.xor(y); // x2 = x1 xor y1 | ||
y.andNot(x); // y2 = y1 andnot x2 | ||
x.xor(c); // x3 = x2 xor c1 | ||
|
||
c.andNot(x); // c2 = c1 andnot x3 | ||
c.or(y); // c3 = c2 or y2 | ||
|
||
i++; | ||
} | ||
|
||
while (i < bitmaps.length) { | ||
x = bitmaps[i]; | ||
if (x == null) { | ||
break; | ||
} | ||
|
||
x.xor(c); // x2 = x1 xor c1 | ||
c.andNot(x); // c2 = c1 andnot x2 | ||
|
||
i++; | ||
} | ||
|
||
while (i < bitmaps.length) { | ||
x = other.bitmaps[i]; | ||
if (x == null) { | ||
break; | ||
} | ||
|
||
x.xor(c); // x2 = x1 xor c1 | ||
c.andNot(x); // c2 = c1 andnot x2 | ||
|
||
bitmaps[i] = x; | ||
|
||
i++; | ||
} | ||
|
||
if (i == bitmaps.length) { | ||
if (overflow == null) { | ||
overflow = c; | ||
} else { | ||
overflow.or(c); | ||
} | ||
} | ||
|
||
return this; | ||
} | ||
} |
Oops, something went wrong.