forked from apache/lucene-solr
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'termfreqfreq' into bitmapfrequency
- Loading branch information
Showing
5 changed files
with
101 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
63 changes: 48 additions & 15 deletions
63
solr/core/src/java/org/apache/solr/search/facet/TermFrequencyCounter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,74 @@ | ||
package org.apache.solr.search.facet; | ||
|
||
import java.util.HashMap; | ||
import java.util.LinkedHashMap; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
import org.apache.solr.common.util.NamedList; | ||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
|
||
public class TermFrequencyCounter { | ||
private final Map<String, Integer> counters; | ||
private final Map<String, Integer> counts; | ||
private boolean overflow; | ||
|
||
public TermFrequencyCounter() { | ||
this.counters = new HashMap<>(); | ||
this.counts = new HashMap<>(); | ||
} | ||
|
||
public Map<String, Integer> getCounters() { | ||
return this.counters; | ||
public Map<String, Integer> getCounts() { | ||
return this.counts; | ||
} | ||
|
||
public void add(String value) { | ||
counters.merge(value, 1, Integer::sum); | ||
counts.merge(value, 1, Integer::sum); | ||
} | ||
|
||
public Map<String, Integer> serialize(int limit) { | ||
if (limit < Integer.MAX_VALUE && limit < counters.size()) { | ||
return counters.entrySet() | ||
.stream() | ||
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending | ||
.limit(limit) | ||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); | ||
public SimpleOrderedMap<Object> serialize(int limit) { | ||
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>(); | ||
|
||
if (limit < counts.size()) { | ||
result.add("counts", getTopCounts(counts, limit)); | ||
result.add("overflow", Boolean.TRUE); | ||
} else { | ||
return counters; | ||
result.add("counts", counts); | ||
result.add("overflow", Boolean.FALSE); | ||
} | ||
|
||
return result; | ||
} | ||
|
||
private Map<String, Integer> getTopCounts(Map<String, Integer> counters, int limit) { | ||
return counters.entrySet() | ||
.stream() | ||
.sorted((l, r) -> r.getValue() - l.getValue()) // sort by value descending | ||
.limit(limit) | ||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); | ||
} | ||
|
||
public TermFrequencyCounter merge(Map<String, Integer> serialized) { | ||
serialized.forEach((value, freq) -> counters.merge(value, freq, Integer::sum)); | ||
public TermFrequencyCounter merge(NamedList<Object> serialized) { | ||
final Map<String, Integer> counts = (Map<String, Integer>) serialized.get("counts"); | ||
if (counts != null) { | ||
counts.forEach((value, freq) -> this.counts.merge(value, freq, Integer::sum)); | ||
} | ||
|
||
final Boolean overflow = (Boolean) serialized.get("overflow"); | ||
if (overflow != null) { | ||
this.overflow = this.overflow || overflow; | ||
} | ||
|
||
return this; | ||
} | ||
|
||
public SimpleOrderedMap<Object> toFrequencyOfFrequencies() { | ||
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>(); | ||
|
||
Map<Integer, Integer> frequencies = new LinkedHashMap<>(); | ||
counts.forEach((value, freq) -> frequencies.merge(freq, 1, Integer::sum)); | ||
|
||
result.add("frequencies", frequencies); | ||
result.add("overflow", overflow); | ||
|
||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters