Skip to content

Commit

Permalink
Merge branch 'termfreqfreq' into bitmapfrequency
Browse files Browse the repository at this point in the history
  • Loading branch information
mkavanagh committed Oct 5, 2020
2 parents f682d1a + 62849e9 commit 8aae4dd
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 38 deletions.
34 changes: 33 additions & 1 deletion solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,39 @@
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import org.apache.lucene.queries.function.docvalues.LongDocValues;
import org.apache.lucene.queries.function.valuesource.*;
import org.apache.lucene.queries.function.valuesource.ConstNumberSource;
import org.apache.lucene.queries.function.valuesource.ConstValueSource;
import org.apache.lucene.queries.function.valuesource.DefFunction;
import org.apache.lucene.queries.function.valuesource.DivFloatFunction;
import org.apache.lucene.queries.function.valuesource.DocFreqValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleConstValueSource;
import org.apache.lucene.queries.function.valuesource.DualFloatFunction;
import org.apache.lucene.queries.function.valuesource.IDFValueSource;
import org.apache.lucene.queries.function.valuesource.IfFunction;
import org.apache.lucene.queries.function.valuesource.JoinDocFreqValueSource;
import org.apache.lucene.queries.function.valuesource.LinearFloatFunction;
import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
import org.apache.lucene.queries.function.valuesource.MaxDocValueSource;
import org.apache.lucene.queries.function.valuesource.MaxFloatFunction;
import org.apache.lucene.queries.function.valuesource.MinFloatFunction;
import org.apache.lucene.queries.function.valuesource.MultiBoolFunction;
import org.apache.lucene.queries.function.valuesource.MultiValueSource;
import org.apache.lucene.queries.function.valuesource.NormValueSource;
import org.apache.lucene.queries.function.valuesource.NumDocsValueSource;
import org.apache.lucene.queries.function.valuesource.ProductFloatFunction;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.lucene.queries.function.valuesource.RangeMapFloatFunction;
import org.apache.lucene.queries.function.valuesource.ReciprocalFloatFunction;
import org.apache.lucene.queries.function.valuesource.ScaleFloatFunction;
import org.apache.lucene.queries.function.valuesource.SimpleBoolFunction;
import org.apache.lucene.queries.function.valuesource.SimpleFloatFunction;
import org.apache.lucene.queries.function.valuesource.SingleFunction;
import org.apache.lucene.queries.function.valuesource.SumFloatFunction;
import org.apache.lucene.queries.function.valuesource.SumTotalTermFreqValueSource;
import org.apache.lucene.queries.function.valuesource.TFValueSource;
import org.apache.lucene.queries.function.valuesource.TermFreqValueSource;
import org.apache.lucene.queries.function.valuesource.TotalTermFreqValueSource;
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.search.IndexSearcher;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,41 +1,74 @@
package org.apache.solr.search.facet;

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;

public class TermFrequencyCounter {
private final Map<String, Integer> counters;
private final Map<String, Integer> counts;
private boolean overflow;

public TermFrequencyCounter() {
this.counters = new HashMap<>();
this.counts = new HashMap<>();
}

public Map<String, Integer> getCounters() {
return this.counters;
public Map<String, Integer> getCounts() {
return this.counts;
}

public void add(String value) {
counters.merge(value, 1, Integer::sum);
counts.merge(value, 1, Integer::sum);
}

public Map<String, Integer> serialize(int limit) {
if (limit < Integer.MAX_VALUE && limit < counters.size()) {
return counters.entrySet()
.stream()
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending
.limit(limit)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
public SimpleOrderedMap<Object> serialize(int limit) {
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>();

if (limit < counts.size()) {
result.add("counts", getTopCounts(counts, limit));
result.add("overflow", Boolean.TRUE);
} else {
return counters;
result.add("counts", counts);
result.add("overflow", Boolean.FALSE);
}

return result;
}

private Map<String, Integer> getTopCounts(Map<String, Integer> counters, int limit) {
return counters.entrySet()
.stream()
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending
.limit(limit)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

public TermFrequencyCounter merge(Map<String, Integer> serialized) {
serialized.forEach((value, freq) -> counters.merge(value, freq, Integer::sum));
public TermFrequencyCounter merge(NamedList<Object> serialized) {
final Map<String, Integer> counts = (Map<String, Integer>) serialized.get("counts");
if (counts != null) {
counts.forEach((value, freq) -> this.counts.merge(value, freq, Integer::sum));
}

final Boolean overflow = (Boolean) serialized.get("overflow");
if (overflow != null) {
this.overflow = this.overflow || overflow;
}

return this;
}

public SimpleOrderedMap<Object> toFrequencyOfFrequencies() {
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>();

Map<Integer, Integer> frequencies = new LinkedHashMap<>();
counts.forEach((value, freq) -> frequencies.merge(freq, 1, Integer::sum));

result.add("frequencies", frequencies);
result.add("overflow", overflow);

return result;
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
package org.apache.solr.search.facet;

import java.util.LinkedHashMap;
import java.util.Map;

import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.FunctionQParser;
Expand All @@ -25,7 +22,7 @@ public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) {

@Override
public FacetMerger createFacetMerger(Object prototype) {
return new Merger(termLimit);
return new Merger();
}

public static class Parser extends ValueSourceParser {
Expand All @@ -45,14 +42,14 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError {
private static class Merger extends FacetMerger {
private final TermFrequencyCounter result;

public Merger(int termLimit) {
public Merger() {
this.result = new TermFrequencyCounter();
}

@Override
public void merge(Object facetResult, Context mcontext) {
if (facetResult instanceof Map) {
result.merge((Map<String, Integer>) facetResult);
if (facetResult instanceof SimpleOrderedMap) {
result.merge((SimpleOrderedMap<Object>) facetResult);
}
}

Expand All @@ -63,12 +60,7 @@ public void finish(Context mcontext) {

@Override
public Object getMergedResult() {
Map<Integer, Integer> map = new LinkedHashMap<>();

result.getCounters()
.forEach((value, freq) -> map.merge(freq, 1, Integer::sum));

return map;
return result.toFrequencyOfFrequencies();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.function.IntFunction;

import org.apache.lucene.queries.function.ValueSource;
import org.apache.solr.common.util.SimpleOrderedMap;

public class TermFrequencySlotAcc extends FuncSlotAcc {
private TermFrequencyCounter[] result;
Expand Down Expand Up @@ -33,10 +33,18 @@ public int compare(int slotA, int slotB) {

@Override
public Object getValue(int slotNum) {
if (result[slotNum] != null) {
return result[slotNum].serialize(termLimit);
if (fcontext.isShard()) {
if (result[slotNum] != null) {
return result[slotNum].serialize(termLimit);
} else {
return new SimpleOrderedMap<>();
}
} else {
return Collections.emptyList();
if (result[slotNum] != null) {
return result[slotNum].toFrequencyOfFrequencies();
} else {
return new SimpleOrderedMap<>();
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import com.carrotsearch.randomizedtesting.annotations.Seed;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.common.util.SimpleOrderedMap;
Expand Down Expand Up @@ -104,7 +102,7 @@ private static void assertCount(TermFrequencyCounter counter, String value, int
assertEquals(
"value " + value + " should have count " + count,
count,
(int) counter.getCounters().getOrDefault(value, 0)
(int) counter.getCounts().getOrDefault(value, 0)
);
}

Expand All @@ -116,7 +114,7 @@ private static TermFrequencyCounter serdeser(TermFrequencyCounter counter, int l

InputStream in = new ByteArrayInputStream(out.toByteArray());
counter = new TermFrequencyCounter();
counter.merge((Map<String, Integer>) codec.unmarshal(in));
counter.merge((SimpleOrderedMap<Object>) codec.unmarshal(in));

return counter;
}
Expand All @@ -132,7 +130,7 @@ private static TermFrequencyCounter merge(
codec.marshal(toMerge.serialize(limit), out);

InputStream in = new ByteArrayInputStream(out.toByteArray());
counter.merge((Map<String, Integer>) codec.unmarshal(in));
counter.merge((SimpleOrderedMap<Object>) codec.unmarshal(in));

return counter;
}
Expand Down

0 comments on commit 8aae4dd

Please sign in to comment.