Skip to content

Commit

Permalink
Ability to facet on a function
Browse files Browse the repository at this point in the history
  • Loading branch information
timatbw committed Aug 3, 2018
1 parent 3cf1da0 commit 14ce9d9
Show file tree
Hide file tree
Showing 4 changed files with 367 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,42 +103,7 @@ public Object getMergedResult() {

sortBuckets();

long first = freq.offset;
long end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
long last = Math.min(sortedBuckets.size(), end);

List<SimpleOrderedMap> resultBuckets = new ArrayList<>(Math.max(0, (int)(last - first)));

/** this only works if there are no filters (like mincount)
for (int i=first; i<last; i++) {
FacetBucket bucket = sortedBuckets.get(i);
resultBuckets.add( bucket.getMergedBucket() );
}
***/

// TODO: change effective offsets + limits at shards...

int off = (int)freq.offset;
int lim = freq.limit >= 0 ? (int)freq.limit : Integer.MAX_VALUE;
for (FacetBucket bucket : sortedBuckets) {
if (bucket.getCount() < freq.mincount) {
continue;
}

if (off > 0) {
--off;
continue;
}

if (resultBuckets.size() >= lim) {
break;
}

resultBuckets.add( bucket.getMergedBucket() );
}


result.add("buckets", resultBuckets);
result.add("buckets", getPaginatedBuckets());
if (missingBucket != null) {
result.add("missing", missingBucket.getMergedBucket());
}
Expand Down
237 changes: 237 additions & 0 deletions solr/core/src/java/org/apache/solr/search/facet/FacetFunction.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.search.facet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionRangeQuery;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetBuilder;

public class FacetFunction extends FacetRequestSorted {

ValueSource valueSource;

public FacetFunction() {
mincount = 1;
limit = -1;
}

@Override
public FacetProcessor createFacetProcessor(FacetContext fcontext) {
return new FacetFunctionProcessor(fcontext, this);
}

@Override
public FacetMerger createFacetMerger(Object prototype) {
return new FacetFunctionMerger(this);
}

@Override
public Map<String, Object> getFacetDescription() {
Map<String, Object> descr = new HashMap<>();
descr.put("function", valueSource.description());
return descr;
}
}

class FacetFunctionMerger extends FacetRequestSortedMerger<FacetFunction> {

public FacetFunctionMerger(FacetFunction freq) {
super(freq);
}

@Override
public void merge(Object facetResult, Context mcontext) {
SimpleOrderedMap<Object> facetResultMap = (SimpleOrderedMap)facetResult;
List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>)facetResultMap.get("buckets");
mergeBucketList(bucketList, mcontext);
}

@Override
public void finish(Context mcontext) {
// nothing more to do
}

@Override
public Object getMergedResult() {
SimpleOrderedMap<Object> result = new SimpleOrderedMap<>();
sortBuckets();
result.add("buckets", getPaginatedBuckets());
return result;
}
}

class FacetFunctionProcessor extends FacetProcessor<FacetFunction> {

protected boolean firstPhase;
protected int segBase = 0;
protected FunctionValues functionValues;
protected Map<Object, Bucket> buckets = new HashMap<>();
protected Comparator<Bucket> comparator;
protected boolean isSortingByStat;

FacetFunctionProcessor(FacetContext fcontext, FacetFunction freq) {
super(fcontext, freq);
chooseComparator();
}

@Override
public void process() throws IOException {
super.process();

firstPhase = true;
collect(fcontext.base, 0);

firstPhase = false;
ArrayList<Bucket> bucketList = new ArrayList<>();
for (Bucket bucket : buckets.values()) {
if (!fcontext.isShard() && bucket.getCount() < freq.mincount) {
continue;
}
Object key = bucket.getKey();
bucket.response = new SimpleOrderedMap<>();
bucket.response.add("val", key);

// We're passing the computed DocSet for the bucket, but we'll also provide a Query to recreate it, although
// that should only be needed by the excludeTags logic
Query bucketFilter = new FunctionRangeQuery(freq.valueSource, key.toString(), key.toString(), true, true);
countAcc = null;
fillBucket(bucket.response, bucketFilter, bucket.getDocSet(), (fcontext.flags & FacetContext.SKIP_FACET)!=0, fcontext.facetInfo);
if (isSortingByStat) {
bucket.sortValue = accMap.get(freq.sortVariable).getValue(0);
}
bucketList.add(bucket);
}

List<SimpleOrderedMap<Object>> responseBuckets = bucketList.stream()
.sorted(comparator)
.skip(fcontext.isShard() ? 0 : freq.offset)
// TODO refactor calculation of effectiveLimit using overrequest and use here
.limit(fcontext.isShard() || freq.limit < 0 ? Integer.MAX_VALUE : freq.limit)
.map(bucket -> bucket.response)
.collect(Collectors.toList());

response = new SimpleOrderedMap<>();
response.add("buckets", responseBuckets);
}

private void chooseComparator() {
if ("count".equals(freq.sortVariable) || fcontext.isShard()) {
comparator = Bucket.COUNT_COMPARATOR.thenComparing(Bucket.KEY_COMPARATOR);
} else if ("index".equals(freq.sortVariable)) {
comparator = Bucket.KEY_COMPARATOR;
} else if (freq.getFacetStats().containsKey(freq.sortVariable)) {
comparator = Bucket.SORTSTAT_COMPARATOR.thenComparing(Bucket.KEY_COMPARATOR);
isSortingByStat = true;
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Unknown facet sort value " + freq.sortVariable);
}
if (FacetRequest.SortDirection.desc.equals(freq.sortDirection)) {
comparator = comparator.reversed();
}
}

@Override
void collect(int segDoc, int slot) throws IOException {
if (firstPhase) {
// Sift the documents into buckets, 1 bucket per distinct result value from the function
Object objectVal = functionValues.objectVal(segDoc);
if (objectVal == null) {
return; // TODO consider a 'missing' bucket for these?
}
Bucket bucket = buckets.computeIfAbsent(objectVal, key -> new Bucket(key, fcontext.searcher.maxDoc(), fcontext.base.size()));
bucket.addDoc(segDoc + segBase);
} else {
super.collect(segDoc, slot);
}
}

@Override
void setNextReader(LeafReaderContext readerContext) throws IOException {
if (firstPhase) {
segBase = readerContext.docBase;
functionValues = freq.valueSource.getValues(fcontext.qcontext, readerContext);
} else {
super.setNextReader(readerContext);
}
}

static class Bucket {

static final Comparator<Bucket> KEY_COMPARATOR = (b1, b2) -> ((Comparable)b1.getKey()).compareTo(b2.getKey());
static final Comparator<Bucket> COUNT_COMPARATOR = (b1, b2) -> Integer.compare(b1.getCount(), b2.getCount());
static final Comparator<Bucket> SORTSTAT_COMPARATOR = (b1, b2) -> {
if (b1.sortValue == null || b2.sortValue == null) {
return 0;
} else {
return ((Comparable)b1.sortValue).compareTo(b2.sortValue);
}
};

private final Object key;
private final DocSetBuilder docSetBuilder;
private DocSet docSet;
Object sortValue;
SimpleOrderedMap<Object> response;

// maxDoc and parentSize help decide what kind of DocSet to use.
// parentSize is an upper bound on how many docs will be added to this bucket
Bucket(Object key, int maxDoc, int parentSize) {
this.key = key;

// Crossover point where bitset more space-efficient than sorted ints is maxDoc >> 5
// i.e. 32 bit int vs 1 bit
// builder uses >>> 7 on maxDoc as its threshold, hence we'll use >> 2 on our
// expected upper bound of doc set size
this.docSetBuilder = new DocSetBuilder(maxDoc, parentSize >> 2);
}

Object getKey() {
return key;
}

int getCount() {
return getDocSet().size();
}

void addDoc(int doc) {
docSetBuilder.add(doc);
}

DocSet getDocSet() {
if (docSet == null) {
docSet = docSetBuilder.buildUniqueInOrder(null);
}
return docSet;
}
}
}
Loading

0 comments on commit 14ce9d9

Please sign in to comment.