From d8bc9bcfcf772dfbfb6aa39dc019b0787946707a Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Sun, 2 Feb 2020 15:15:17 +0300 Subject: [PATCH] SOLR-12325: uniqueBlock(\{!v=foo:bar}) --- solr/CHANGES.txt | 2 + .../apache/solr/search/ValueSourceParser.java | 8 ++- .../solr/search/facet/UniqueBlockAgg.java | 32 +++------ .../search/facet/UniqueBlockFieldAgg.java | 45 ++++++++++++ .../search/facet/UniqueBlockQueryAgg.java | 71 +++++++++++++++++++ .../solr/search/facet/TestJsonFacets.java | 41 ++++++----- .../TestJsonFacetsWithNestedObjects.java | 31 +++++--- solr/solr-ref-guide/src/json-facet-api.adoc | 4 +- 8 files changed, 182 insertions(+), 52 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/facet/UniqueBlockFieldAgg.java create mode 100644 solr/core/src/java/org/apache/solr/search/facet/UniqueBlockQueryAgg.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 40bd76cecb5..7fc4e1d8246 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -168,6 +168,8 @@ New Features * SOLR-13892: New "top-level" docValues join implementation (Jason Gerlowski, Joel Bernstein) + * SOLR-12325: Introducing uniqueBlock({!v=type:parent}) aggregation (Anatolii Siuniaev via Mikhail Khludnev) + Improvements --------------------- * SOLR-14120: Define JavaScript methods 'includes' and 'startsWith' to ensure AdminUI can be displayed when using diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index 64cadb6843f..d054bc8d886 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -67,7 +67,8 @@ import org.apache.solr.search.facet.SumAgg; import org.apache.solr.search.facet.SumsqAgg; import org.apache.solr.search.facet.UniqueAgg; -import org.apache.solr.search.facet.UniqueBlockAgg; +import org.apache.solr.search.facet.UniqueBlockFieldAgg; +import org.apache.solr.search.facet.UniqueBlockQueryAgg; import org.apache.solr.search.facet.VarianceAgg; import org.apache.solr.search.function.CollapseScoreFunction; import org.apache.solr.search.function.ConcatStringFunction; @@ -971,7 +972,10 @@ public ValueSource parse(FunctionQParser fp) throws SyntaxError { addParser("agg_uniqueBlock", new ValueSourceParser() { @Override public ValueSource parse(FunctionQParser fp) throws SyntaxError { - return new UniqueBlockAgg(fp.parseArg()); + if (fp.sp.peek() == QueryParsing.LOCALPARAM_START.charAt(0) ) { + return new UniqueBlockQueryAgg(fp.parseNestedQuery()); + } + return new UniqueBlockFieldAgg(fp.parseArg()); } }); diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java index 42ddbb5f2e8..77dfa643b12 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockAgg.java @@ -21,14 +21,14 @@ import org.apache.solr.schema.SchemaField; -public class UniqueBlockAgg extends UniqueAgg { +public abstract class UniqueBlockAgg extends UniqueAgg { - private static final class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc { - - private int lastSeenValuesPerSlot[]; - - private UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots) - throws IOException { // + protected static class UniqueBlockSlotAcc extends UniqueSinglevaluedSlotAcc { + + protected int[] lastSeenValuesPerSlot; + + protected UniqueBlockSlotAcc(FacetContext fcontext, SchemaField field, int numSlots) + throws IOException { // super(fcontext, field, /*numSlots suppressing inherited accumulator */0, null); counts = new int[numSlots]; lastSeenValuesPerSlot = new int[numSlots]; @@ -70,25 +70,11 @@ public Object getValue(int slot) throws IOException { public UniqueBlockAgg(String field) { super(field); - name= uniqueBlock; + name = uniqueBlock; } @Override - public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { - final String fieldName = getArg(); - SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName); - if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { - throw new IllegalArgumentException(uniqueBlock+"("+fieldName+ - ") doesn't allow multivalue fields, got " + sf); - } else { - if (sf.getType().getNumberType() != null) { - throw new IllegalArgumentException(uniqueBlock+"("+fieldName+ - ") not yet support numbers " + sf); - } else { - return new UniqueBlockSlotAcc(fcontext, sf, numSlots); - } - } - } + public abstract SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException ; @Override public FacetMerger createFacetMerger(Object prototype) { diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockFieldAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockFieldAgg.java new file mode 100644 index 00000000000..3f7f949341b --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockFieldAgg.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search.facet; + +import java.io.IOException; + +import org.apache.solr.schema.SchemaField; + +public class UniqueBlockFieldAgg extends UniqueBlockAgg { + + public UniqueBlockFieldAgg(String field) { + super(field); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + final String fieldName = getArg(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(fieldName); + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + throw new IllegalArgumentException(name+"("+fieldName+ + ") doesn't allow multivalue fields, got " + sf); + } else { + if (sf.getType().getNumberType() != null) { + throw new IllegalArgumentException(name+"("+fieldName+ + ") not yet support numbers " + sf); + } else { + return new UniqueBlockSlotAcc(fcontext, sf, numSlots); + } + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockQueryAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockQueryAgg.java new file mode 100644 index 00000000000..3cc46d8e3c8 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueBlockQueryAgg.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.function.IntFunction; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.BitSet; + +import static org.apache.solr.search.join.BlockJoinParentQParser.getCachedFilter; + +public class UniqueBlockQueryAgg extends UniqueBlockAgg { + + private static final class UniqueBlockQuerySlotAcc extends UniqueBlockSlotAcc { + + private Query query; + private BitSet parentBitSet; + + private UniqueBlockQuerySlotAcc(FacetContext fcontext, Query query, int numSlots) + throws IOException { // + super(fcontext, null, numSlots); + this.query = query; + } + + @Override + public void setNextReader(LeafReaderContext readerContext) throws IOException { + this.parentBitSet = getCachedFilter(fcontext.req, query).getFilter().getBitSet(readerContext); + } + + @Override + public void collect(int doc, int slotNum, IntFunction slotContext) { + if (parentBitSet != null) { + int ord = parentBitSet.nextSetBit(doc); + if (ord != DocIdSetIterator.NO_MORE_DOCS) { + collectOrdToSlot(slotNum, ord); + } + } + } + } + + final private Query query; + + public UniqueBlockQueryAgg(Query query) { + super(null); + this.query = query; + arg = query.toString(); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + return new UniqueBlockQuerySlotAcc(fcontext, query, numSlots); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 3a6c694098c..ed566e32e70 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -3149,14 +3149,18 @@ public void testUniquesForMethod() throws Exception { parent = sdoc("id", "2", "type_s","book", "book_s","B", "v_t","q w"); parent.addChildDocument( sdoc("id","2.1", "type_s","page", "page_s","a", "v_t","x y z") ); - parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","b", "v_t","x y ") ); - parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","c", "v_t"," y z" ) ); + parent.addChildDocument( sdoc("id","2.2", "type_s","page", "page_s","a", "v_t","x1 z") ); + parent.addChildDocument( sdoc("id","2.3", "type_s","page", "page_s","a", "v_t","x2 z") ); + parent.addChildDocument( sdoc("id","2.4", "type_s","page", "page_s","b", "v_t","x y ") ); + parent.addChildDocument( sdoc("id","2.5", "type_s","page", "page_s","c", "v_t"," y z" ) ); + parent.addChildDocument( sdoc("id","2.6", "type_s","page", "page_s","c", "v_t"," z" ) ); client.add(parent, null); parent = sdoc("id", "3", "type_s","book", "book_s","C", "v_t","q w e"); - parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","d", "v_t","x ") ); - parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","e", "v_t"," y ") ); - parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","f", "v_t"," z") ); + parent.addChildDocument( sdoc("id","3.1", "type_s","page", "page_s","b", "v_t","x y ") ); + parent.addChildDocument( sdoc("id","3.2", "type_s","page", "page_s","d", "v_t","x ") ); + parent.addChildDocument( sdoc("id","3.3", "type_s","page", "page_s","e", "v_t"," y ") ); + parent.addChildDocument( sdoc("id","3.4", "type_s","page", "page_s","f", "v_t"," z") ); client.add(parent, null); parent = sdoc("id", "4", "type_s","book", "book_s","D", "v_t","e"); @@ -3171,35 +3175,38 @@ public void testUniquesForMethod() throws Exception { " field:type_s," + " limit:-1," + " facet: {" + - " in_books: \"unique(_root_)\" }"+ + " in_books: \"unique(_root_)\"," + + " via_field:\"uniqueBlock(_root_)\","+ + " via_query:\"uniqueBlock({!v=type_s:book})\" }"+ " }," + " pages: {" + " type:terms," + " field:page_s," + " limit:-1," + " facet: {" + - " in_books: \"uniqueBlock(_root_)\" }"+ + " in_books: \"unique(_root_)\"," + + " via_field:\"uniqueBlock(_root_)\","+ + " via_query:\"uniqueBlock({!v=type_s:book})\" }"+ " }" + "}" ) - , "response=={numFound:6,start:0,docs:[]}" - , "facets=={ count:6," + + , "response=={numFound:10,start:0,docs:[]}" + , "facets=={ count:10," + "types:{" + - " buckets:[ {val:page, count:6, in_books:2} ]}" + + " buckets:[ {val:page, count:10, in_books:2, via_field:2, via_query:2 } ]}" + "pages:{" + " buckets:[ " + - " {val:a, count:1, in_books:1}," + - " {val:b, count:1, in_books:1}," + - " {val:c, count:1, in_books:1}," + - " {val:d, count:1, in_books:1}," + - " {val:e, count:1, in_books:1}," + - " {val:f, count:1, in_books:1}" + + " {val:a, count:3, in_books:1, via_field:1, via_query:1}," + + " {val:b, count:2, in_books:2, via_field:2, via_query:2}," + + " {val:c, count:2, in_books:1, via_field:1, via_query:1}," + + " {val:d, count:1, in_books:1, via_field:1, via_query:1}," + + " {val:e, count:1, in_books:1, via_field:1, via_query:1}," + + " {val:f, count:1, in_books:1, via_field:1, via_query:1}" + " ]}" + "}" ); } - /** * Similar to {@link #testBlockJoin} but uses query time joining. *

diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java index cb8b71a9ee0..6e05491bb34 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetsWithNestedObjects.java @@ -61,7 +61,9 @@ private static void indexBooksAndReviews() throws Exception { "author_s", "dan", "comment_t", "This book was too long.")); client.add(book1, null); - + if (rarely()) { + client.commit(); + } SolrInputDocument book2 = sdoc( "id", "book2", "type_s", "book", @@ -338,25 +340,36 @@ public void testDomainFilterExclusionsInFilters() throws Exception { public void testUniqueBlock() throws Exception { final Client client = Client.localClient(); ModifiableSolrParams p = params("rows","0"); + + // unique block using field and query logic client.testJQ(params(p, "q", "{!parent tag=top which=type_s:book v=$childquery}" , "childquery", "comment_t:*" - , "fl", "id", "fl" , "title_t" + , "fl", "id", "fl" , "title_t" + , "root", "_root_" + , "parentQuery", "type_s:book" , "json.facet", "{" + " types: {" + - " domain: { blockChildren:\"type_s:book\"" + - " }," + + " domain: { blockChildren:\"type_s:book\"" + + " }," + " type:terms," + - " field:type_s," - + " limit:-1," + + " field:type_s," + + " limit:-1," + " facet: {" + - " in_books: \"uniqueBlock(_root_)\" }"+//}}," + + " in_books1: \"uniqueBlock(_root_)\"," + // field logic + " in_books2: \"uniqueBlock($root)\"," + // field reference logic + " via_query1:\"uniqueBlock({!v=type_s:book})\", " + // query logic + " via_query2:\"uniqueBlock({!v=$parentQuery})\" ," + // query reference logic + " partial_query:\"uniqueBlock({!v=cat_s:fantasy})\" ," + // first doc hit only, never count afterwards + " query_no_match:\"uniqueBlock({!v=cat_s:horor})\" }" + " }" + - "}" ) + "}" ) , "response=={numFound:2,start:0,docs:[]}" , "facets=={ count:2," + "types:{" + - " buckets:[ {val:review, count:5, in_books:2} ]}" + + " buckets:[ {val:review, count:5, in_books1:2, in_books2:2, " + + " via_query1:2, via_query2:2, " + + " partial_query:1, query_no_match:0} ]}" + "}" ); } diff --git a/solr/solr-ref-guide/src/json-facet-api.adoc b/solr/solr-ref-guide/src/json-facet-api.adoc index 9c944fdfefa..1d2bc8adab9 100644 --- a/solr/solr-ref-guide/src/json-facet-api.adoc +++ b/solr/solr-ref-guide/src/json-facet-api.adoc @@ -572,7 +572,8 @@ Unlike all the facets discussed so far, Aggregation functions (also called *face |missing |`missing(author)` |number of documents which do not have value for given field or function |countvals |`countvals(author)` |number of values for a given field or function |unique |`unique(author)` |number of unique values of the given field. Beyond 100 values it yields not exact estimate -|uniqueBlock |`uniqueBlock(\_root_)` |same as above with smaller footprint strictly for <>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended. +|uniqueBlock |`uniqueBlock(\_root_)` or `uniqueBlock($fldref)` where `fldref=_root_` |same as above with smaller footprint strictly for <>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended. +| |`uniqueBlock({!v=type:parent})` or `uniqueBlock({!v=$qryref})` where `qryref=type:parent` |same as above, but using bitset of the given query to aggregate hits. |hll |`hll(author)` |distributed cardinality estimate via hyper-log-log algorithm |percentile |`percentile(salary,50,75,99,99.9)` |Percentile estimates via t-digest algorithm. When sorting by this metric, the first percentile listed is used as the sort value. |sumsq |`sumsq(rent)` |sum of squares of field or function @@ -875,6 +876,7 @@ color: { limit: -1, facet: { productsCount: "uniqueBlock(_root_)" + // or "uniqueBlock({!v=type:product})" } } ----