forked from apache/lucene-solr
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'termfreqfreq' into bitmapfrequency
- Loading branch information
Showing
12 changed files
with
536 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
43 changes: 43 additions & 0 deletions
43
solr/core/src/java/org/apache/solr/search/facet/TermFrequencyCounter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.util.Comparator; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
|
||
public class TermFrequencyCounter { | ||
private final Map<String, Integer> counters; | ||
private final int limit; | ||
|
||
public TermFrequencyCounter(int limit) { | ||
this.counters = new HashMap<>(); | ||
this.limit = limit; | ||
} | ||
|
||
public Map<String, Integer> getCounters() { | ||
return this.counters; | ||
} | ||
|
||
public void add(String value) { | ||
counters.merge(value, 1, Integer::sum); | ||
} | ||
|
||
public SimpleOrderedMap<Object> serialize() { | ||
SimpleOrderedMap<Object> serialized = new SimpleOrderedMap<>(); | ||
|
||
counters.entrySet() | ||
.stream() | ||
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending | ||
.limit(limit) | ||
.forEach(entry -> serialized.add(entry.getKey(), entry.getValue())); | ||
|
||
return serialized; | ||
} | ||
|
||
public TermFrequencyCounter merge(SimpleOrderedMap<Integer> serialized) { | ||
serialized.forEach((value, freq) -> counters.merge(value, freq, Integer::sum)); | ||
|
||
return this; | ||
} | ||
} |
74 changes: 74 additions & 0 deletions
74
solr/core/src/java/org/apache/solr/search/facet/TermFrequencyOfFrequenciesAgg.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.util.LinkedHashMap; | ||
import java.util.Map; | ||
|
||
import org.apache.lucene.queries.function.ValueSource; | ||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.apache.solr.search.FunctionQParser; | ||
import org.apache.solr.search.SyntaxError; | ||
import org.apache.solr.search.ValueSourceParser; | ||
|
||
public class TermFrequencyOfFrequenciesAgg extends SimpleAggValueSource { | ||
private final int termLimit; | ||
|
||
public TermFrequencyOfFrequenciesAgg(ValueSource vs, int termLimit) { | ||
super("termfreqfreq", vs); | ||
|
||
this.termLimit = termLimit; | ||
} | ||
|
||
@Override | ||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) { | ||
return new TermFrequencySlotAcc(getArg(), fcontext, numSlots, termLimit); | ||
} | ||
|
||
@Override | ||
public FacetMerger createFacetMerger(Object prototype) { | ||
return new Merger(termLimit); | ||
} | ||
|
||
public static class Parser extends ValueSourceParser { | ||
@Override | ||
public ValueSource parse(FunctionQParser fp) throws SyntaxError { | ||
ValueSource vs = fp.parseValueSource(); | ||
|
||
int termLimit = Integer.MAX_VALUE; | ||
if (fp.hasMoreArguments()) { | ||
termLimit = fp.parseInt(); | ||
} | ||
|
||
return new TermFrequencyOfFrequenciesAgg(vs, termLimit); | ||
} | ||
} | ||
|
||
private static class Merger extends FacetMerger { | ||
private final TermFrequencyCounter result; | ||
|
||
public Merger(int termLimit) { | ||
this.result = new TermFrequencyCounter(termLimit); | ||
} | ||
|
||
@Override | ||
public void merge(Object facetResult, Context mcontext) { | ||
if (facetResult instanceof SimpleOrderedMap) { | ||
result.merge((SimpleOrderedMap<Integer>) facetResult); | ||
} | ||
} | ||
|
||
@Override | ||
public void finish(Context mcontext) { | ||
// never called | ||
} | ||
|
||
@Override | ||
public Object getMergedResult() { | ||
Map<Integer, Integer> map = new LinkedHashMap<>(); | ||
|
||
result.getCounters() | ||
.forEach((value, freq) -> map.merge(freq, 1, Integer::sum)); | ||
|
||
return map; | ||
} | ||
} | ||
} |
52 changes: 52 additions & 0 deletions
52
solr/core/src/java/org/apache/solr/search/facet/TermFrequencySlotAcc.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.function.IntFunction; | ||
|
||
import org.apache.lucene.queries.function.ValueSource; | ||
|
||
public class TermFrequencySlotAcc extends FuncSlotAcc { | ||
private TermFrequencyCounter[] result; | ||
private final int termLimit; | ||
|
||
public TermFrequencySlotAcc(ValueSource values, FacetContext fcontext, int numSlots, int termLimit) { | ||
super(values, fcontext, numSlots); | ||
|
||
this.result = new TermFrequencyCounter[numSlots]; | ||
this.termLimit = termLimit; | ||
} | ||
|
||
@Override | ||
public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException { | ||
if (result[slot] == null) { | ||
result[slot] = new TermFrequencyCounter(termLimit); | ||
} | ||
result[slot].add(values.strVal(doc)); | ||
} | ||
|
||
@Override | ||
public int compare(int slotA, int slotB) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public Object getValue(int slotNum) { | ||
if (result[slotNum] != null) { | ||
return result[slotNum].serialize(); | ||
} else { | ||
return Collections.emptyList(); | ||
} | ||
} | ||
|
||
@Override | ||
public void reset() { | ||
Arrays.fill(result, null); | ||
} | ||
|
||
@Override | ||
public void resize(Resizer resizer) { | ||
result = resizer.resize(result, null); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.