forked from apache/lucene-solr
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
438 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
159 changes: 159 additions & 0 deletions
159
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencies.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.roaringbitmap.RoaringBitmap; | ||
|
||
public class BitmapFrequencies { | ||
private final List<RoaringBitmap> frequencies; | ||
private final Integer maxFrequency; | ||
private RoaringBitmap overflow; | ||
|
||
public BitmapFrequencies() { | ||
this.frequencies = new ArrayList<>(); | ||
this.maxFrequency = null; | ||
} | ||
|
||
public BitmapFrequencies(int maxFrequency) { | ||
this.frequencies = new ArrayList<>(maxFrequency); | ||
this.maxFrequency = maxFrequency; | ||
} | ||
|
||
public BitmapFrequencies(SimpleOrderedMap<Object> serialized) { | ||
this(); | ||
|
||
Iterable<byte[]> serializedFrequencies = (Iterable<byte[]>) serialized.get("frequencies"); | ||
if (serializedFrequencies != null) { | ||
for (byte[] bytes : serializedFrequencies) { | ||
this.frequencies.add(BitmapUtil.bytesToBitmap(bytes)); | ||
} | ||
} | ||
|
||
byte[] overflow = (byte[]) serialized.get("overflow"); | ||
if (overflow != null) { | ||
this.overflow = BitmapUtil.bytesToBitmap(overflow); | ||
} | ||
} | ||
|
||
public List<RoaringBitmap> getFrequencies() { | ||
return this.frequencies; | ||
} | ||
|
||
public RoaringBitmap getOverflow() { | ||
return this.overflow; | ||
} | ||
|
||
public void add(int value) { | ||
for (RoaringBitmap frequency : frequencies) { | ||
if (!frequency.contains(value)) { | ||
frequency.add(value); | ||
return; | ||
} | ||
frequency.remove(value); | ||
} | ||
|
||
if (maxFrequency == null || frequencies.size() < maxFrequency) { | ||
frequencies.add(RoaringBitmap.bitmapOf(value)); | ||
} else { | ||
if (overflow == null) { | ||
overflow = RoaringBitmap.bitmapOf(value); | ||
} else { | ||
overflow.add(value); | ||
} | ||
} | ||
} | ||
|
||
public SimpleOrderedMap<Object> serialize() { | ||
SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(); | ||
|
||
if (!frequencies.isEmpty()) { | ||
List<byte[]> serialized = new ArrayList<>(frequencies.size()); | ||
for (RoaringBitmap bitmap : frequencies) { | ||
bitmap.runOptimize(); | ||
serialized.add(BitmapUtil.bitmapToBytes(bitmap)); | ||
} | ||
map.add("frequencies", serialized); | ||
} | ||
|
||
if (overflow != null) { | ||
map.add("overflow", BitmapUtil.bitmapToBytes(overflow)); | ||
} | ||
|
||
return map; | ||
} | ||
|
||
// Merges (in-place) with frequencies from another sample. The supplied BitmapFrequencies is no longer valid after | ||
// this operation. | ||
public void merge(BitmapFrequencies other) { | ||
int smallest = Math.min(frequencies.size(), other.frequencies.size()); | ||
|
||
RoaringBitmap carried = new RoaringBitmap(); | ||
int f = 0; | ||
while (f < smallest) { | ||
// x(f) is the set of values which occurred with frequency f in this sample | ||
// y(f) is the set of values which occurred with frequency f in the sample to be merged | ||
// carried is the intersection of x(f-1) and y(f-1) | ||
// | ||
// 1) x(f) and y(f) may intersect | ||
// 2) x(f) does not intersect with x(f-1) | ||
// 3) y(f) does not intersect with y(f-1) | ||
// 4) For carried to intersect with x(f), at least one value would have to be in x(f-1), y(f-1) and x(f). | ||
// As per 2), this is impossible. | ||
// 5) For carried to intersect with y(f), at least one value would have to be in x(f-1), y(f-1) and y(f). | ||
// As per 3), this is impossible. | ||
// 6) Therefore, carried does not intersect with either x(f) or y(f). | ||
RoaringBitmap x = frequencies.get(f); | ||
RoaringBitmap y = other.frequencies.get(f); | ||
|
||
// We first merge carried, x, and y. | ||
// Since x and y may intersect, the result may contain some values with frequency at most f+1. | ||
RoaringBitmap merged = carried; | ||
merged.or(x); | ||
merged.or(y); | ||
|
||
// We now calculate the values in the merged set which have frequency f+1, and remove them (to be carried). | ||
carried = x; | ||
carried.and(y); | ||
merged.andNot(carried); | ||
|
||
frequencies.set(f, merged); | ||
f++; | ||
} | ||
|
||
while (f < other.frequencies.size()) { | ||
RoaringBitmap merged = other.frequencies.get(f); | ||
|
||
if (carried != null) { | ||
merged.or(carried); | ||
carried = null; | ||
} | ||
|
||
frequencies.add(merged); | ||
f++; | ||
} | ||
|
||
if (maxFrequency == null || frequencies.size() < maxFrequency) { | ||
if (carried != null) { | ||
frequencies.add(carried); | ||
} | ||
} else { | ||
if (other.overflow != null) { | ||
if (overflow == null) { | ||
overflow = other.overflow; | ||
} else { | ||
overflow.or(other.overflow); | ||
} | ||
} | ||
|
||
if (carried != null) { | ||
if (overflow == null) { | ||
overflow = carried; | ||
} else { | ||
overflow.or(carried); | ||
} | ||
} | ||
} | ||
} | ||
} |
84 changes: 84 additions & 0 deletions
84
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import org.apache.lucene.queries.function.ValueSource; | ||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.apache.solr.search.FunctionQParser; | ||
import org.apache.solr.search.SyntaxError; | ||
import org.apache.solr.search.ValueSourceParser; | ||
|
||
// Calculates the frequency of ordinal values, up to an optional maximum frequency | ||
// | ||
// The response is a map with the following fields: | ||
// - frequencies: an array where frequencies[i] is a Roaring Bitmap of the ordinal values with frequency i (omitted if | ||
// empty) | ||
// - overflow: a Roaring Bitmap of ordinal values with more than the maximum frequency (omitted if empty) | ||
// | ||
// Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. | ||
public class BitmapFrequencyAgg extends SimpleAggValueSource { | ||
private final Integer maxFrequency; | ||
|
||
public BitmapFrequencyAgg(ValueSource vs, Integer maxFrequency) { | ||
super("bitmapfreq", vs); | ||
|
||
this.maxFrequency = maxFrequency; | ||
} | ||
|
||
@Override | ||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { | ||
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, maxFrequency); | ||
} | ||
|
||
@Override | ||
public FacetMerger createFacetMerger(Object prototype) { | ||
if (maxFrequency == null) { | ||
return new BitmapFrequencyFacetMerger(); | ||
} else { | ||
return new BitmapFrequencyFacetMerger(maxFrequency); | ||
} | ||
} | ||
|
||
public static class Parser extends ValueSourceParser { | ||
@Override | ||
public ValueSource parse(FunctionQParser fp) throws SyntaxError { | ||
ValueSource valueSource = fp.parseValueSource(); | ||
|
||
Integer maxFrequency = null; | ||
if (fp.hasMoreArguments()) { | ||
maxFrequency = fp.parseInt(); | ||
} | ||
|
||
return new BitmapFrequencyAgg(valueSource, maxFrequency); | ||
} | ||
} | ||
|
||
private static class BitmapFrequencyFacetMerger extends FacetMerger { | ||
private final BitmapFrequencies result; | ||
|
||
public BitmapFrequencyFacetMerger() { | ||
this.result = new BitmapFrequencies(); | ||
} | ||
|
||
public BitmapFrequencyFacetMerger(int maxFrequency) { | ||
this.result = new BitmapFrequencies(maxFrequency); | ||
} | ||
|
||
@Override | ||
public void merge(Object facetResult, Context mcontext) { | ||
if (facetResult instanceof SimpleOrderedMap) { | ||
BitmapFrequencies deserialized = new BitmapFrequencies((SimpleOrderedMap<Object>) facetResult); | ||
|
||
result.merge(deserialized); | ||
} | ||
} | ||
|
||
@Override | ||
public void finish(Context mcontext) { | ||
// never called | ||
} | ||
|
||
@Override | ||
public Object getMergedResult() { | ||
return result.serialize(); | ||
} | ||
} | ||
} |
56 changes: 56 additions & 0 deletions
56
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.function.IntFunction; | ||
|
||
import org.apache.lucene.queries.function.ValueSource; | ||
|
||
public class BitmapFrequencySlotAcc extends FuncSlotAcc { | ||
private BitmapFrequencies[] result; | ||
private final Integer maxFrequency; | ||
|
||
public BitmapFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, Integer maxFrequency) { | ||
super(values, fcontext, numSlots); | ||
|
||
this.result = new BitmapFrequencies[numSlots]; | ||
this.maxFrequency = maxFrequency; | ||
} | ||
|
||
@Override | ||
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException { | ||
if (result[slot] == null) { | ||
if (this.maxFrequency != null) { | ||
result[slot] = new BitmapFrequencies(this.maxFrequency); | ||
} else { | ||
result[slot] = new BitmapFrequencies(); | ||
} | ||
} | ||
result[slot].add(values.intVal(doc)); | ||
} | ||
|
||
@Override | ||
public int compare(int slotA, int slotB) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public Object getValue(int slotNum) { | ||
if (result[slotNum] != null) { | ||
return result[slotNum].serialize(); | ||
} else { | ||
return Collections.emptyList(); | ||
} | ||
} | ||
|
||
@Override | ||
public void reset() { | ||
Arrays.fill(result, null); | ||
} | ||
|
||
@Override | ||
public void resize(Resizer resizer) { | ||
result = resizer.resize(result, null); | ||
} | ||
} |
Oops, something went wrong.