From 9ada171565e35f457280b85106183b789750e081 Mon Sep 17 00:00:00 2001 From: Matthew Kavanagh Date: Wed, 2 Sep 2020 13:26:14 +0100 Subject: [PATCH] bitmap agg to get freq-of-freq of int32 vals --- .../apache/solr/search/ValueSourceParser.java | 3 + .../facet/FrequencyOfFrequenciesAgg.java | 97 +++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index d30e254d0c99..e122eced3b59 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -59,6 +59,7 @@ import org.apache.solr.search.facet.BitmapCollectorAgg; import org.apache.solr.search.facet.BitmapFrequencyAgg; import org.apache.solr.search.facet.CountAgg; +import org.apache.solr.search.facet.FrequencyOfFrequenciesAgg; import org.apache.solr.search.facet.HLLAgg; import org.apache.solr.search.facet.MinMaxAgg; import org.apache.solr.search.facet.PercentileAgg; @@ -1062,6 +1063,8 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError { addParser("agg_bitmapfreq", new BitmapFrequencyAgg.Parser()); + addParser("agg_bitmapfreqfreq", new FrequencyOfFrequenciesAgg.Parser()); + addParser("childfield", new ChildFieldValueSourceParser()); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java new file mode 100644 index 000000000000..09d9ffc570ff --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FrequencyOfFrequenciesAgg.java @@ -0,0 +1,97 @@ +package org.apache.solr.search.facet; + +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.FunctionQParser; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.ValueSourceParser; + +/** + * Calculates the frequency-of-frequencies (number of values occurring x times) of ordinal values. + * + * The response is a map where the keys are frequencies (x = number of times a value occurred), and the values are + * the frequency-of-frequencies (number of values which occurred x times). + * + * Lacking a coherent definition of magnitude other than the raw count, this aggregate cannot be used for sorting. + */ +public class FrequencyOfFrequenciesAgg extends SimpleAggValueSource { + private final int size; + + public FrequencyOfFrequenciesAgg(ValueSource vs, Integer size) { + super("bitmapfreqfreq", vs); + + this.size = size; + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { + return new BitmapFrequencySlotAcc(getArg(), fcontext, numSlots, size); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new Merger(size); + } + + public static class Parser extends ValueSourceParser { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + ValueSource valueSource = fp.parseValueSource(); + + int size = 8; + if (fp.hasMoreArguments()) { + size = fp.parseInt(); + } + + return new FrequencyOfFrequenciesAgg(valueSource, size); + } + } + + private static class Merger extends FacetMerger { + private final int size; + private BitmapFrequencyCounter result; + + public Merger(int size) { + this.size = size; + this.result = new BitmapFrequencyCounter(size); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + if (facetResult instanceof SimpleOrderedMap) { + BitmapFrequencyCounter deserialized = new BitmapFrequencyCounter(size); + deserialized.deserialize((SimpleOrderedMap) facetResult); + + result = result.merge(deserialized); + } + } + + @Override + public void finish(Context mcontext) { + // never called + } + + @Override + public Object getMergedResult() { + Map map = new LinkedHashMap<>(); + + result.normalize(); + + int[] lowFrequencies = result.decode(); + for (int i = 0; i < lowFrequencies.length; i++) { + int value = lowFrequencies[i]; + if (value > 0) { + map.put(i, value); + } + } + + result.getOverflow() + .forEach((value, freq) -> map.merge(freq, 1, Integer::sum)); + + return map; + } + } +}