Skip to content

Commit

Permalink
Add a method:dv to json Range Facet which should be faster for large …
Browse files Browse the repository at this point in the history
…numbers of buckets
  • Loading branch information
timatbw committed May 18, 2018
1 parent d217b2e commit 1aede48
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 10 deletions.
185 changes: 175 additions & 10 deletions solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,18 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
Expand All @@ -35,18 +40,41 @@
import org.apache.solr.schema.TrieDateField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetBuilder;
import org.apache.solr.search.DocSetUtil;
import org.apache.solr.util.DateMathParser;

import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;

public class FacetRange extends FacetRequestSorted {

public enum FacetMethod {
DV, // Does a single pass using DocValues to sift into buckets
ENUM, // Uses a RangeQuery for each bucket
;

public static FacetRange.FacetMethod fromString(String method) {
if (method == null || method.length() == 0) return ENUM;
switch (method) {
case "dv":
return DV;
case "enum":
return ENUM;
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown FacetRange method " + method);
}
}
}


String field;
Object start;
Object end;
Object gap;
boolean hardend = false;
EnumSet<FacetParams.FacetRangeInclude> include;
EnumSet<FacetParams.FacetRangeOther> others;
FacetMethod method;

{
// defaults
Expand Down Expand Up @@ -114,6 +142,26 @@ public Range(Object label, Comparable low, Comparable high, boolean includeLower
this.includeLower = includeLower;
this.includeUpper = includeUpper;
}

public boolean contains(Comparable val) {
if (low != null) {
if (includeLower && val.compareTo(low) < 0) {
return false;
} else if (!includeLower && val.compareTo(low) <= 0) {
return false;
}
}

if (high != null) {
if (includeUpper && val.compareTo(high) > 0) {
return false;
} else if (!includeUpper && val.compareTo(high) >= 0) {
return false;
}
}

return true;
}
}

public static Calc getNumericCalc(SchemaField sf) {
Expand Down Expand Up @@ -304,14 +352,26 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {

createAccs(fcontext.base.size(), slotCount);

FacetRangeMethod rangeMethod;
if (freq.method == FacetRange.FacetMethod.DV) {
if (!sf.hasDocValues() || sf.multiValued()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Facet range method " + freq.method + " only works for single valued numeric fields with docValues");
}
rangeMethod = new FacetRangeByDocValues();
} else {
rangeMethod = new FacetRangeByQuery();
}

for (int idx = 0; idx<rangeList.size(); idx++) {
rangeStats(rangeList.get(idx), idx);
rangeMethod.processRange(rangeList.get(idx), idx);
}

for (int idx = 0; idx<otherList.size(); idx++) {
rangeStats(otherList.get(idx), rangeList.size() + idx);
rangeMethod.processRange(otherList.get(idx), rangeList.size() + idx);
}

rangeMethod.finish();

final SimpleOrderedMap res = new SimpleOrderedMap<>();
List<SimpleOrderedMap> buckets = new ArrayList<>();
Expand Down Expand Up @@ -341,14 +401,9 @@ private SimpleOrderedMap getRangeCountsIndexed() throws IOException {

private Query[] filters;
private DocSet[] intersections;
private void rangeStats(Range range, int slot) throws IOException {
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
// TODO: specialize count only
DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base);
filters[slot] = rangeQ;
intersections[slot] = intersection; // save for later // TODO: only save if number of slots is small enough?
int num = collect(intersection, slot);
countAcc.incrementCount(slot, num); // TODO: roll this into collect()

private Query buildRangeQuery(Range range) {
return sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
}

private void doSubs(SimpleOrderedMap bucket, int slot) throws IOException {
Expand Down Expand Up @@ -378,8 +433,118 @@ private SimpleOrderedMap<Object> rangeStats(Range range, boolean special ) thro
return bucket;
}

abstract class FacetRangeMethod {
void processRange(Range range, int slot) throws IOException {
filters[slot] = buildRangeQuery(range);
doOneRange(range, slot);
}
abstract void doOneRange(Range range, int slot) throws IOException;
abstract void finish() throws IOException;
}

// Gathers the stats for each Range bucket by using a RangeQuery to run a search.
// Suitable when the number of buckets is fairly low, or the base DocSet is big
class FacetRangeByQuery extends FacetRangeMethod {

@Override
void doOneRange(Range range, int slot) throws IOException {
// TODO: specialize count only
intersections[slot] = fcontext.searcher.getDocSet(filters[slot], fcontext.base);
int num = collect(intersections[slot], slot);
countAcc.incrementCount(slot, num); // TODO: roll this into collect()
}

@Override
void finish() throws IOException { }
}

// Gathers the stats by making a single pass over the base DocSet, using
// the docValue for the field to sift into the appropriate Range buckets.
// Suitable when the gap leads to many interval buckets, especially if this is a
// subfacet inside a parent with many buckets of its own. However, this method
// can be slower if the base DocSet is big
class FacetRangeByDocValues extends FacetRangeMethod {

private DocSetBuilder[] builders;
private Comparable[] starts;

FacetRangeByDocValues() {
builders = new DocSetBuilder[intersections.length];
starts = new Comparable[rangeList.size()];
}

@Override
void doOneRange(Range range, int slot) throws IOException {
builders[slot] = new DocSetBuilder(fcontext.searcher.maxDoc(), fcontext.base.size() >> 2);
if (slot < starts.length) {
starts[slot] = range.low;
}
}

@Override
void finish() throws IOException {
DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
int docBase;
NumericDocValues values = null;

@Override
public boolean needsScores() {
return false;
}

@Override
protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
docBase = ctx.docBase;
values = DocValues.getNumeric(ctx.reader(), sf.getName());
}

@Override
public void collect(int segDoc) throws IOException {
if (values.advanceExact(segDoc)) {
placeDocId(values.longValue(), docBase + segDoc);
}
}
}
);

for (int slot = 0; slot<builders.length; slot++) {
intersections[slot] = builders[slot].buildUniqueInOrder(null);
int num = collect(intersections[slot], slot);
countAcc.incrementCount(slot, num);
}
}

void placeDocId(long val, int docId) {
Comparable comparableVal = calc.bitsToValue(val);

int insertionPoint = Arrays.binarySearch(starts, comparableVal);

int slot;
if (insertionPoint >= 0) {
if (rangeList.get(insertionPoint).includeLower) {
slot = insertionPoint;
} else {
slot = insertionPoint - 1;
}
} else {
slot = -(insertionPoint + 2); // See docs for binarySearch return value
}

if (slot >= 0 && slot < rangeList.size() &&
rangeList.get(slot).contains(comparableVal)) { // It could be out of range
builders[slot].add(docId);
}

// Also add to any relevant Ranges in the otherList
slot = rangeList.size();
for (Range range : otherList) {
if (range.contains(comparableVal)) {
builders[slot].add(docId);
}
slot++;
}
}
}

// Essentially copied from SimpleFacets...
// would be nice to unify this stuff w/ analytics component...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,7 @@ public FacetRange parse(Object arg) throws SyntaxError {
facet.gap = m.get("gap");
facet.hardend = getBoolean(m, "hardend", facet.hardend);
facet.mincount = getLong(m, "mincount", 0);
facet.method = FacetRange.FacetMethod.fromString(getString(m, "method", null));

// TODO: refactor list-of-options code

Expand Down

0 comments on commit 1aede48

Please sign in to comment.