diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java index 4f57bcd90c06..4f8bfd5b52c2 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java @@ -103,42 +103,7 @@ public Object getMergedResult() { sortBuckets(); - long first = freq.offset; - long end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE; - long last = Math.min(sortedBuckets.size(), end); - - List resultBuckets = new ArrayList<>(Math.max(0, (int)(last - first))); - - /** this only works if there are no filters (like mincount) - for (int i=first; i= 0 ? (int)freq.limit : Integer.MAX_VALUE; - for (FacetBucket bucket : sortedBuckets) { - if (bucket.getCount() < freq.mincount) { - continue; - } - - if (off > 0) { - --off; - continue; - } - - if (resultBuckets.size() >= lim) { - break; - } - - resultBuckets.add( bucket.getMergedBucket() ); - } - - - result.add("buckets", resultBuckets); + result.add("buckets", getPaginatedBuckets()); if (missingBucket != null) { result.add("missing", missingBucket.getMergedBucket()); } diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFunction.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFunction.java new file mode 100644 index 000000000000..8a1c92589dad --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFunction.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.FunctionRangeQuery; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.search.DocSet; +import org.apache.solr.search.DocSetBuilder; + +public class FacetFunction extends FacetRequestSorted { + + ValueSource valueSource; + + public FacetFunction() { + mincount = 1; + limit = -1; + } + + @Override + public FacetProcessor createFacetProcessor(FacetContext fcontext) { + return new FacetFunctionProcessor(fcontext, this); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetFunctionMerger(this); + } + + @Override + public Map getFacetDescription() { + Map descr = new HashMap<>(); + descr.put("function", valueSource.description()); + return descr; + } +} + +class FacetFunctionMerger extends FacetRequestSortedMerger { + + public FacetFunctionMerger(FacetFunction freq) { + super(freq); + } + + @Override + public void merge(Object facetResult, Context mcontext) { + SimpleOrderedMap facetResultMap = (SimpleOrderedMap)facetResult; + List bucketList = (List)facetResultMap.get("buckets"); + mergeBucketList(bucketList, mcontext); + } + + @Override + public void finish(Context mcontext) { + // nothing more to do + } + + @Override + public Object getMergedResult() { + SimpleOrderedMap result = new SimpleOrderedMap<>(); + sortBuckets(); + result.add("buckets", getPaginatedBuckets()); + return result; + } +} + +class FacetFunctionProcessor extends FacetProcessor { + + protected boolean firstPhase; + protected int segBase = 0; + protected FunctionValues functionValues; + protected Map buckets = new HashMap<>(); + protected Comparator comparator; + protected boolean isSortingByStat; + + FacetFunctionProcessor(FacetContext fcontext, FacetFunction freq) { + super(fcontext, freq); + chooseComparator(); + } + + @Override + public void process() throws IOException { + super.process(); + + firstPhase = true; + collect(fcontext.base, 0); + + firstPhase = false; + ArrayList bucketList = new ArrayList<>(); + for (Bucket bucket : buckets.values()) { + if (!fcontext.isShard() && bucket.getCount() < freq.mincount) { + continue; + } + Object key = bucket.getKey(); + bucket.response = new SimpleOrderedMap<>(); + bucket.response.add("val", key); + + // We're passing the computed DocSet for the bucket, but we'll also provide a Query to recreate it, although + // that should only be needed by the excludeTags logic + Query bucketFilter = new FunctionRangeQuery(freq.valueSource, key.toString(), key.toString(), true, true); + countAcc = null; + fillBucket(bucket.response, bucketFilter, bucket.getDocSet(), (fcontext.flags & FacetContext.SKIP_FACET)!=0, fcontext.facetInfo); + if (isSortingByStat) { + bucket.sortValue = accMap.get(freq.sortVariable).getValue(0); + } + bucketList.add(bucket); + } + + List> responseBuckets = bucketList.stream() + .sorted(comparator) + .skip(fcontext.isShard() ? 0 : freq.offset) + // TODO refactor calculation of effectiveLimit using overrequest and use here + .limit(fcontext.isShard() || freq.limit < 0 ? Integer.MAX_VALUE : freq.limit) + .map(bucket -> bucket.response) + .collect(Collectors.toList()); + + response = new SimpleOrderedMap<>(); + response.add("buckets", responseBuckets); + } + + private void chooseComparator() { + if ("count".equals(freq.sortVariable) || fcontext.isShard()) { + comparator = Bucket.COUNT_COMPARATOR.thenComparing(Bucket.KEY_COMPARATOR); + } else if ("index".equals(freq.sortVariable)) { + comparator = Bucket.KEY_COMPARATOR; + } else if (freq.getFacetStats().containsKey(freq.sortVariable)) { + comparator = Bucket.SORTSTAT_COMPARATOR.thenComparing(Bucket.KEY_COMPARATOR); + isSortingByStat = true; + } else { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Unknown facet sort value " + freq.sortVariable); + } + if (FacetRequest.SortDirection.desc.equals(freq.sortDirection)) { + comparator = comparator.reversed(); + } + } + + @Override + void collect(int segDoc, int slot) throws IOException { + if (firstPhase) { + // Sift the documents into buckets, 1 bucket per distinct result value from the function + Object objectVal = functionValues.objectVal(segDoc); + if (objectVal == null) { + return; // TODO consider a 'missing' bucket for these? + } + Bucket bucket = buckets.computeIfAbsent(objectVal, key -> new Bucket(key, fcontext.searcher.maxDoc(), fcontext.base.size())); + bucket.addDoc(segDoc + segBase); + } else { + super.collect(segDoc, slot); + } + } + + @Override + void setNextReader(LeafReaderContext readerContext) throws IOException { + if (firstPhase) { + segBase = readerContext.docBase; + functionValues = freq.valueSource.getValues(fcontext.qcontext, readerContext); + } else { + super.setNextReader(readerContext); + } + } + + static class Bucket { + + static final Comparator KEY_COMPARATOR = (b1, b2) -> ((Comparable)b1.getKey()).compareTo(b2.getKey()); + static final Comparator COUNT_COMPARATOR = (b1, b2) -> Integer.compare(b1.getCount(), b2.getCount()); + static final Comparator SORTSTAT_COMPARATOR = (b1, b2) -> { + if (b1.sortValue == null || b2.sortValue == null) { + return 0; + } else { + return ((Comparable)b1.sortValue).compareTo(b2.sortValue); + } + }; + + private final Object key; + private final DocSetBuilder docSetBuilder; + private DocSet docSet; + Object sortValue; + SimpleOrderedMap response; + + // maxDoc and parentSize help decide what kind of DocSet to use. + // parentSize is an upper bound on how many docs will be added to this bucket + Bucket(Object key, int maxDoc, int parentSize) { + this.key = key; + + // Crossover point where bitset more space-efficient than sorted ints is maxDoc >> 5 + // i.e. 32 bit int vs 1 bit + // builder uses >>> 7 on maxDoc as its threshold, hence we'll use >> 2 on our + // expected upper bound of doc set size + this.docSetBuilder = new DocSetBuilder(maxDoc, parentSize >> 2); + } + + Object getKey() { + return key; + } + + int getCount() { + return getDocSet().size(); + } + + void addDoc(int doc) { + docSetBuilder.add(doc); + } + + DocSet getDocSet() { + if (docSet == null) { + docSet = docSetBuilder.buildUniqueInOrder(null); + } + return docSet; + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java index 4cf8a6897789..2438c1711328 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java @@ -391,6 +391,8 @@ public Object parseFacetOrStat(String key, String type, Object args) throws Synt return parseQueryFacet(key, args); } else if ("range".equals(type)) { return parseRangeFacet(key, args); + } else if ("function".equals(type)) { + return parseFunctionFacet(key, args); } AggValueSource stat = parseStat(key, type, args); @@ -417,6 +419,11 @@ FacetRange parseRangeFacet(String key, Object args) throws SyntaxError { return parser.parse(args); } + FacetFunction parseFunctionFacet(String key, Object args) throws SyntaxError { + FacetFunctionParser parser = new FacetFunctionParser(this, key); + return parser.parse(args); + } + public Object parseStringFacetOrStat(String key, String s) throws SyntaxError { // "avg(myfield)" return parseStringStat(key, s); @@ -584,6 +591,47 @@ public SolrQueryRequest getSolrRequest() { return parent.getSolrRequest(); } + protected void parsePagination(FacetRequestSorted sortedFacet, Map m) { + sortedFacet.offset = getLong(m, "offset", sortedFacet.offset); + sortedFacet.limit = getLong(m, "limit", sortedFacet.limit); + sortedFacet.overrequest = (int) getLong(m, "overrequest", sortedFacet.overrequest); + if (sortedFacet.limit == 0) sortedFacet.offset = 0; // normalize. an offset with a limit of non-zero isn't useful. + sortedFacet.mincount = getLong(m, "mincount", sortedFacet.mincount); + } + + // Sort specification is currently + // sort : 'mystat desc' + // OR + // sort : { mystat : 'desc' } + protected void parseSort(FacetRequestSorted sortedFacet, Object sort) { + if (sort == null) { + sortedFacet.sortVariable = "count"; + sortedFacet.sortDirection = FacetRequest.SortDirection.desc; + } else if (sort instanceof String) { + String sortStr = (String)sort; + if (sortStr.endsWith(" asc")) { + sortedFacet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length()); + sortedFacet.sortDirection = FacetRequest.SortDirection.asc; + } else if (sortStr.endsWith(" desc")) { + sortedFacet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length()); + sortedFacet.sortDirection = FacetRequest.SortDirection.desc; + } else { + sortedFacet.sortVariable = sortStr; + sortedFacet.sortDirection = "index".equals(sortedFacet.sortVariable) ? FacetRequest.SortDirection.asc : FacetRequest.SortDirection.desc; // default direction for "index" is ascending + } + } else { + // sort : { myvar : 'desc' } + Map map = (Map)sort; + // TODO: validate + Map.Entry entry = map.entrySet().iterator().next(); + String k = entry.getKey(); + Object v = entry.getValue(); + sortedFacet.sortVariable = k; + sortedFacet.sortDirection = FacetRequest.SortDirection.valueOf(v.toString()); + } + + } + } @@ -690,74 +738,38 @@ public FacetField parse(Object arg) throws SyntaxError { parseCommonParams(arg); if (arg instanceof String) { // just the field name... - facet.field = (String)arg; - parseSort( null ); // TODO: defaults + facet.field = (String) arg; + parseSort(facet,null); // TODO: defaults } else if (arg instanceof Map) { Map m = (Map) arg; facet.field = getField(m); - facet.offset = getLong(m, "offset", facet.offset); - facet.limit = getLong(m, "limit", facet.limit); - facet.overrequest = (int) getLong(m, "overrequest", facet.overrequest); - if (facet.limit == 0) facet.offset = 0; // normalize. an offset with a limit of non-zero isn't useful. - facet.mincount = getLong(m, "mincount", facet.mincount); + parsePagination(facet, m); facet.missing = getBoolean(m, "missing", facet.missing); facet.numBuckets = getBoolean(m, "numBuckets", facet.numBuckets); facet.prefix = getString(m, "prefix", facet.prefix); facet.allBuckets = getBoolean(m, "allBuckets", facet.allBuckets); facet.method = FacetField.FacetMethod.fromString(getString(m, "method", null)); - facet.cacheDf = (int)getLong(m, "cacheDf", facet.cacheDf); + facet.cacheDf = (int) getLong(m, "cacheDf", facet.cacheDf); // TODO: pull up to higher level? facet.refine = FacetField.RefineMethod.fromObj(m.get("refine")); - facet.perSeg = (Boolean)m.get("perSeg"); + facet.perSeg = (Boolean) m.get("perSeg"); // facet.sort may depend on a facet stat... // should we be parsing / validating this here, or in the execution environment? Object o = m.get("facet"); parseSubs(o); - parseSort( m.get(SORT) ); + parseSort(facet, m.get(SORT)); } return facet; } +} - // Sort specification is currently - // sort : 'mystat desc' - // OR - // sort : { mystat : 'desc' } - private void parseSort(Object sort) { - if (sort == null) { - facet.sortVariable = "count"; - facet.sortDirection = FacetRequest.SortDirection.desc; - } else if (sort instanceof String) { - String sortStr = (String)sort; - if (sortStr.endsWith(" asc")) { - facet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length()); - facet.sortDirection = FacetRequest.SortDirection.asc; - } else if (sortStr.endsWith(" desc")) { - facet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length()); - facet.sortDirection = FacetRequest.SortDirection.desc; - } else { - facet.sortVariable = sortStr; - facet.sortDirection = "index".equals(facet.sortVariable) ? FacetRequest.SortDirection.asc : FacetRequest.SortDirection.desc; // default direction for "index" is ascending - } - } else { - // sort : { myvar : 'desc' } - Map map = (Map)sort; - // TODO: validate - Map.Entry entry = map.entrySet().iterator().next(); - String k = entry.getKey(); - Object v = entry.getValue(); - facet.sortVariable = k; - facet.sortDirection = FacetRequest.SortDirection.valueOf(v.toString()); - } - - } -} @@ -829,3 +841,40 @@ public FacetRange parse(Object arg) throws SyntaxError { +class FacetFunctionParser extends FacetParser { + public FacetFunctionParser(FacetParser parent, String key) { + super(parent, key); + facet = new FacetFunction(); + } + + @Override + public FacetFunction parse(Object arg) throws SyntaxError { + parseCommonParams(arg); + + String fstring = null; + if (arg instanceof String) { + fstring = (String)arg; + + } else if (arg instanceof Map) { + Map m = (Map) arg; + fstring = getString(m, "f", null); + if (fstring == null) { + fstring = getString(m, "function", null); + } + + parsePagination(facet, m); + + parseSubs( m.get("facet") ); + + parseSort(facet, m.get(SORT)); + } + + if (fstring != null) { + FunctionQParser parser = (FunctionQParser)QParser.getParser(fstring, FunctionQParserPlugin.NAME, getSolrRequest()); + parser.setIsFilter(true); + facet.valueSource = parser.parseValueSource(); + } + + return facet; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java index 9ffdea7835a3..222917e26a63 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java @@ -139,6 +139,43 @@ public int compare(int a, int b) { } } + protected List getPaginatedBuckets() { + long first = freq.offset; + long end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE; + long last = Math.min(sortedBuckets.size(), end); + + List resultBuckets = new ArrayList<>(Math.max(0, (int)(last - first))); + + /** this only works if there are no filters (like mincount) + for (int i=first; i= 0 ? (int)freq.limit : Integer.MAX_VALUE; + for (FacetBucket bucket : sortedBuckets) { + if (bucket.getCount() < freq.mincount) { + continue; + } + + if (off > 0) { + --off; + continue; + } + + if (resultBuckets.size() >= lim) { + break; + } + + resultBuckets.add( bucket.getMergedBucket() ); + } + + return resultBuckets; + } @Override public Map getRefinement(Context mcontext) {