forked from apache/lucene-solr
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
427 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
159 changes: 159 additions & 0 deletions
159
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencies.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.roaringbitmap.RoaringBitmap; | ||
|
||
public class BitmapFrequencies { | ||
private final List<RoaringBitmap> frequencies; | ||
private final Integer maxFrequency; | ||
private RoaringBitmap overflow; | ||
|
||
public BitmapFrequencies() { | ||
this.frequencies = new ArrayList<>(); | ||
this.maxFrequency = null; | ||
} | ||
|
||
public BitmapFrequencies(int maxFrequency) { | ||
this.frequencies = new ArrayList<>(maxFrequency); | ||
this.maxFrequency = maxFrequency; | ||
} | ||
|
||
public BitmapFrequencies(SimpleOrderedMap<Object> serialized) { | ||
this(); | ||
|
||
Iterable<byte[]> serializedFrequencies = (Iterable<byte[]>) serialized.get("frequencies"); | ||
if (serializedFrequencies != null) { | ||
for (byte[] bytes : serializedFrequencies) { | ||
this.frequencies.add(BitmapUtil.bytesToBitmap(bytes)); | ||
} | ||
} | ||
|
||
byte[] overflow = (byte[]) serialized.get("overflow"); | ||
if (overflow != null) { | ||
this.overflow = BitmapUtil.bytesToBitmap(overflow); | ||
} | ||
} | ||
|
||
public List<RoaringBitmap> getFrequencies() { | ||
return this.frequencies; | ||
} | ||
|
||
public RoaringBitmap getOverflow() { | ||
return this.overflow; | ||
} | ||
|
||
public void add(int value) { | ||
for (RoaringBitmap frequency : frequencies) { | ||
if (!frequency.contains(value)) { | ||
frequency.add(value); | ||
return; | ||
} | ||
frequency.remove(value); | ||
} | ||
|
||
if (maxFrequency == null || frequencies.size() < maxFrequency) { | ||
frequencies.add(RoaringBitmap.bitmapOf(value)); | ||
} else { | ||
if (overflow == null) { | ||
overflow = RoaringBitmap.bitmapOf(value); | ||
} else { | ||
overflow.add(value); | ||
} | ||
} | ||
} | ||
|
||
public SimpleOrderedMap<Object> serialize() { | ||
SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(); | ||
|
||
if (!frequencies.isEmpty()) { | ||
List<byte[]> serialized = new ArrayList<>(frequencies.size()); | ||
for (RoaringBitmap bitmap : frequencies) { | ||
bitmap.runOptimize(); | ||
serialized.add(BitmapUtil.bitmapToBytes(bitmap)); | ||
} | ||
map.add("frequencies", serialized); | ||
} | ||
|
||
if (overflow != null) { | ||
map.add("overflow", BitmapUtil.bitmapToBytes(overflow)); | ||
} | ||
|
||
return map; | ||
} | ||
|
||
// Merges (in-place) with frequencies from another sample. The supplied BitmapFrequencies is no longer valid after | ||
// this operation. | ||
public void merge(BitmapFrequencies other) { | ||
int smallest = Math.min(frequencies.size(), other.frequencies.size()); | ||
|
||
RoaringBitmap carried = new RoaringBitmap(); | ||
int f = 0; | ||
while (f < smallest) { | ||
// x(f) is the set of values which occurred with frequency f in this sample | ||
// y(f) is the set of values which occurred with frequency f in the sample to be merged | ||
// carried is the intersection of x(f-1) and y(f-1) | ||
// | ||
// 1) x(f) and y(f) may intersect | ||
// 2) x(f) does not intersect with x(f-1) | ||
// 3) y(f) does not intersect with y(f-1) | ||
// 4) For carried to intersect with x(f), at least one value would have to be in x(f-1), y(f-1) and x(f). | ||
// As per 2), this is impossible. | ||
// 5) For carried to intersect with y(f), at least one value would have to be in x(f-1), y(f-1) and y(f). | ||
// As per 3), this is impossible. | ||
// 6) Therefore, carried does not intersect with either x(f) or y(f). | ||
RoaringBitmap x = frequencies.get(f); | ||
RoaringBitmap y = other.frequencies.get(f); | ||
|
||
// We first merge carried, x, and y. | ||
// Since x and y may intersect, the result may contain some values with frequency at most f+1. | ||
RoaringBitmap merged = carried; | ||
merged.or(x); | ||
merged.or(y); | ||
|
||
// We now calculate the values in the merged set which have frequency f+1, and remove them (to be carried). | ||
carried = x; | ||
carried.and(y); | ||
merged.andNot(carried); | ||
|
||
frequencies.set(f, merged); | ||
f++; | ||
} | ||
|
||
while (f < other.frequencies.size()) { | ||
RoaringBitmap merged = other.frequencies.get(f); | ||
|
||
if (carried != null) { | ||
merged.or(carried); | ||
carried = null; | ||
} | ||
|
||
frequencies.add(merged); | ||
f++; | ||
} | ||
|
||
if (maxFrequency == null || frequencies.size() < maxFrequency) { | ||
if (carried != null) { | ||
frequencies.add(carried); | ||
} | ||
} else { | ||
if (other.overflow != null) { | ||
if (overflow == null) { | ||
overflow = other.overflow; | ||
} else { | ||
overflow.or(other.overflow); | ||
} | ||
} | ||
|
||
if (carried != null) { | ||
if (overflow == null) { | ||
overflow = carried; | ||
} else { | ||
overflow.or(carried); | ||
} | ||
} | ||
} | ||
} | ||
} |
81 changes: 81 additions & 0 deletions
81
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencyAgg.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import org.apache.lucene.queries.function.ValueSource; | ||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.apache.solr.search.FunctionQParser; | ||
import org.apache.solr.search.SyntaxError; | ||
import org.apache.solr.search.ValueSourceParser; | ||
|
||
// Calculates frequencies of ordinal values using bitmaps (up to an optional maximum frequency) | ||
// Response: | ||
// - frequencies: an array (omitted if empty) where frequencies[i] is a Roaring Bitmap of the ordinal values which | ||
// occurred with frequency i | ||
// - overflow: a Roaring Bitmap (omitted if empty) of ordinal values with frequency greater than the supplied maximum | ||
public class BitmapFrequencyAgg extends SimpleAggValueSource { | ||
private final Integer maxFrequency; | ||
|
||
public BitmapFrequencyAgg(ValueSource vs, Integer maxFrequency) { | ||
super("bitmapfrequency", vs); | ||
|
||
this.maxFrequency = maxFrequency; | ||
} | ||
|
||
@Override | ||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { | ||
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, maxFrequency); | ||
} | ||
|
||
@Override | ||
public FacetMerger createFacetMerger(Object prototype) { | ||
if (maxFrequency == null) { | ||
return new BitmapFrequencyFacetMerger(); | ||
} else { | ||
return new BitmapFrequencyFacetMerger(maxFrequency); | ||
} | ||
} | ||
|
||
public static class Parser extends ValueSourceParser { | ||
@Override | ||
public ValueSource parse(FunctionQParser fp) throws SyntaxError { | ||
ValueSource valueSource = fp.parseValueSource(); | ||
|
||
Integer maxFrequency = null; | ||
if (fp.hasMoreArguments()) { | ||
maxFrequency = fp.parseInt(); | ||
} | ||
|
||
return new BitmapFrequencyAgg(valueSource, maxFrequency); | ||
} | ||
} | ||
|
||
private static class BitmapFrequencyFacetMerger extends FacetMerger { | ||
private final BitmapFrequencies result; | ||
|
||
public BitmapFrequencyFacetMerger() { | ||
this.result = new BitmapFrequencies(); | ||
} | ||
|
||
public BitmapFrequencyFacetMerger(int maxFrequency) { | ||
this.result = new BitmapFrequencies(maxFrequency); | ||
} | ||
|
||
@Override | ||
public void merge(Object facetResult, Context mcontext) { | ||
if (facetResult instanceof SimpleOrderedMap) { | ||
BitmapFrequencies deserialized = new BitmapFrequencies((SimpleOrderedMap<Object>) facetResult); | ||
|
||
result.merge(deserialized); | ||
} | ||
} | ||
|
||
@Override | ||
public void finish(Context mcontext) { | ||
// never called | ||
} | ||
|
||
@Override | ||
public Object getMergedResult() { | ||
return result.serialize(); | ||
} | ||
} | ||
} |
56 changes: 56 additions & 0 deletions
56
solr/core/src/java/org/apache/solr/search/facet/BitmapFrequencySlotAcc.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.function.IntFunction; | ||
|
||
import org.apache.lucene.queries.function.ValueSource; | ||
|
||
public class BitmapFrequencySlotAcc extends FuncSlotAcc { | ||
private BitmapFrequencies[] result; | ||
private final Integer maxFrequency; | ||
|
||
public BitmapFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, Integer maxFrequency) { | ||
super(values, fcontext, numSlots); | ||
|
||
this.result = new BitmapFrequencies[numSlots]; | ||
this.maxFrequency = maxFrequency; | ||
} | ||
|
||
@Override | ||
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException { | ||
if (result[slot] == null) { | ||
if (this.maxFrequency != null) { | ||
result[slot] = new BitmapFrequencies(this.maxFrequency); | ||
} else { | ||
result[slot] = new BitmapFrequencies(); | ||
} | ||
} | ||
result[slot].add(values.intVal(doc)); | ||
} | ||
|
||
@Override | ||
public int compare(int slotA, int slotB) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public Object getValue(int slotNum) { | ||
if (result[slotNum] != null) { | ||
return result[slotNum].serialize(); | ||
} else { | ||
return Collections.emptyList(); | ||
} | ||
} | ||
|
||
@Override | ||
public void reset() { | ||
Arrays.fill(result, null); | ||
} | ||
|
||
@Override | ||
public void resize(Resizer resizer) { | ||
result = resizer.resize(result, null); | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
solr/core/src/java/org/apache/solr/search/facet/BitmapUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.ByteArrayOutputStream; | ||
import java.io.DataInputStream; | ||
import java.io.DataOutputStream; | ||
import java.io.IOException; | ||
|
||
import org.roaringbitmap.ImmutableBitmapDataProvider; | ||
import org.roaringbitmap.RoaringBitmap; | ||
|
||
public class BitmapUtil { | ||
public static byte[] bitmapToBytes(ImmutableBitmapDataProvider bitmap) { | ||
try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bos)) { | ||
bitmap.serialize(dos); | ||
dos.close(); | ||
return bos.toByteArray(); | ||
} catch (IOException ioe) { | ||
throw new RuntimeException("Failed to serialise RoaringBitmap to bytes", ioe); | ||
} | ||
} | ||
|
||
public static RoaringBitmap bytesToBitmap(byte[] bytes) { | ||
try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes); DataInputStream dis = new DataInputStream(bis)) { | ||
RoaringBitmap bitmap = new RoaringBitmap(); | ||
bitmap.deserialize(dis); | ||
return bitmap; | ||
} catch (IOException ioe) { | ||
throw new RuntimeException("Failed to deserialise RoaringBitmap from bytes", ioe); | ||
} | ||
} | ||
} |
Oops, something went wrong.