Skip to content

Commit

Permalink
bitmap agg to get freq-of-freq of int32 vals
Browse files Browse the repository at this point in the history
  • Loading branch information
mkavanagh committed Sep 2, 2020
1 parent 6b404df commit 9ada171
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.apache.solr.search.facet.BitmapCollectorAgg;
import org.apache.solr.search.facet.BitmapFrequencyAgg;
import org.apache.solr.search.facet.CountAgg;
import org.apache.solr.search.facet.FrequencyOfFrequenciesAgg;
import org.apache.solr.search.facet.HLLAgg;
import org.apache.solr.search.facet.MinMaxAgg;
import org.apache.solr.search.facet.PercentileAgg;
Expand Down Expand Up @@ -1062,6 +1063,8 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {

addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser());

addParser("agg_bitmapfreqfreq", new FrequencyOfFrequenciesAgg.Parser());

addParser("childfield", new ChildFieldValueSourceParser());
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.apache.solr.search.facet;

import java.util.LinkedHashMap;
import java.util.Map;

import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;

/**
* Calculates the frequency-of-frequencies (number of values occurring x times) of ordinal values.
*
* The response is a map where the keys are frequencies (x = number of times a value occurred), and the values are
* the frequency-of-frequencies (number of values which occurred x times).
*
* Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting.
*/
public class FrequencyOfFrequenciesAgg extends SimpleAggValueSource {
private final int size;

public FrequencyOfFrequenciesAgg(ValueSource vs, Integer size) {
super("bitmapfreqfreq", vs);

this.size = size;
}

@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {
return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size);
}

@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger(size);
}

public static class Parser extends ValueSourceParser {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
ValueSource valueSource = fp.parseValueSource();

int size = 8;
if (fp.hasMoreArguments()) {
size = fp.parseInt();
}

return new FrequencyOfFrequenciesAgg(valueSource, size);
}
}

private static class Merger extends FacetMerger {
private final int size;
private BitmapFrequencyCounter result;

public Merger(int size) {
this.size = size;
this.result = new BitmapFrequencyCounter(size);
}

@Override
public void merge(Object facetResult, Context mcontext) {
if (facetResult instanceof SimpleOrderedMap) {
BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size);
deserialized.deserialize((SimpleOrderedMap<Object>) facetResult);

result = result.merge(deserialized);
}
}

@Override
public void finish(Context mcontext) {
// never called
}

@Override
public Object getMergedResult() {
Map<Integer, Integer> map = new LinkedHashMap<>();

result.normalize();

int[] lowFrequencies = result.decode();
for (int i = 0; i < lowFrequencies.length; i++) {
int value = lowFrequencies[i];
if (value > 0) {
map.put(i, value);
}
}

result.getOverflow()
.forEach((value, freq) -> map.merge(freq, 1, Integer::sum));

return map;
}
}
}

0 comments on commit 9ada171

Please sign in to comment.