Skip to content

Commit

Permalink
SOLR-16555: SolrIndexSearcher - FilterCache intersections/andNot shou…
Browse files Browse the repository at this point in the history
…ld not clone bitsets repeatedly (apache#1184)

Co-authored-by: David Smiley <[email protected]>
  • Loading branch information
risdenk and dsmiley authored Nov 22, 2022
1 parent f4d8e6e commit 30bf94d
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 41 deletions.
2 changes: 2 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ Optimizations

* SOLR-16515: Remove synchronized access to cachedOrdMaps in SlowCompositeReaderWrapper (Dennis Berger, Torsten Bøgh Köster, Marco Petris)

* SOLR-16555: SolrIndexSearcher - FilterCache intersections/andNot should not clone bitsets repeatedly (Kevin Risden, David Smiley)

Bug Fixes
---------------------

Expand Down
22 changes: 17 additions & 5 deletions solr/core/src/java/org/apache/solr/search/BitDocSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -194,19 +194,31 @@ public void addAllTo(FixedBitSet target) {

@Override
public DocSet andNot(DocSet other) {
FixedBitSet newbits = bits.clone();
FixedBitSet newbits = getFixedBitSetClone();
andNot(newbits, other);
return new BitDocSet(newbits);
}

/**
* Helper method for andNot that takes FixedBitSet and DocSet. This modifies the provided
* FixedBitSet to remove all bits contained in the DocSet argument -- equivalent to calling
* a.andNot(b), but modifies the state of the FixedBitSet instead of returning a new FixedBitSet.
*
* @param bits FixedBitSet to operate on
* @param other The DocSet to compare to
*/
protected static void andNot(FixedBitSet bits, DocSet other) {
if (other instanceof BitDocSet) {
newbits.andNot(((BitDocSet) other).bits);
bits.andNot(((BitDocSet) other).bits);
} else {
DocIterator iter = other.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
if (doc < newbits.length()) {
newbits.clear(doc);
if (doc < bits.length()) {
bits.clear(doc);
}
}
}
return new BitDocSet(newbits);
}

@Override
Expand Down
147 changes: 147 additions & 0 deletions solr/core/src/java/org/apache/solr/search/MutableBitDocSet.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;

import org.apache.lucene.util.FixedBitSet;

/**
* A {@link BitDocSet} based implementation that mutates the underlying bits for andNot and
* intersection. This allows for computing the combinations of sets without duplicating the
* underlying array. This MutableBitDocSet should not be cached because it can be modified.
*
* @since solr 9.2
*/
class MutableBitDocSet extends BitDocSet {
private MutableBitDocSet(FixedBitSet bits, int size) {
super(bits, size);
}

/**
* Returns a mutable BitDocSet that is a copy of the provided BitDocSet.
*
* @param bitDocSet a BitDocSet
* @return copy of bitDocSet that is now mutable
*/
public static MutableBitDocSet fromBitDocSet(BitDocSet bitDocSet) {
// don't call size() since we just want to pass through the size
// instead of computing it if it was already computed
return new MutableBitDocSet(bitDocSet.getFixedBitSetClone(), bitDocSet.size);
}

/**
* Returns a new BitDocSet with the same bits if the DocSet provided is a MutableBitDocSet.
* Otherwise, just returns the provided DocSet.
*
* @param docSet DocSet to unwrap if it is a MutableBitDocSet
* @return Unwrapped DocSet that is not mutable
*/
public static DocSet unwrapIfMutable(DocSet docSet) {
if (docSet instanceof MutableBitDocSet) {
MutableBitDocSet mutableBitDocSet = (MutableBitDocSet) docSet;
// don't call size() since we just want to pass through the size
// instead of computing it if it was already computed
return new BitDocSet(mutableBitDocSet.getBits(), mutableBitDocSet.size);
}
return docSet;
}

private void resetSize() {
// We need to reset size since we are changing the cardinality of the underlying bits and size
// is typically cached. This forces size to be recomputed as needed.
this.size = -1;
}

@Override
public DocIterator iterator() {
throw new UnsupportedOperationException();
}

/**
* Returns the intersection of this set with another set. This mutates the underlying bits so do
* not cache the returned bitset.
*
* @return a DocSet representing the intersection
*/
@Override
public DocSet intersection(DocSet other) {
// intersection is overloaded in the smaller DocSets to be more
// efficient, so dispatch off of it instead.
if (!(other instanceof BitDocSet)) {
return other.intersection(this);
}

// Operates directly on the underlying bitsets.
FixedBitSet newbits = getFixedBitSet();
newbits.and(other.getFixedBitSet());

resetSize();
return this;
}

@Override
public int intersectionSize(DocSet other) {
throw new UnsupportedOperationException();
}

@Override
public boolean intersects(DocSet other) {
throw new UnsupportedOperationException();
}

@Override
public int unionSize(DocSet other) {
throw new UnsupportedOperationException();
}

@Override
public int andNotSize(DocSet other) {
throw new UnsupportedOperationException();
}

@Override
public void addAllTo(FixedBitSet target) {
throw new UnsupportedOperationException();
}

/**
* Returns the documents in this set that are not in the other set. This mutates the underlying
* bits so do not cache the returned bitset.
*
* @return a DocSet representing this AND NOT other
*/
@Override
public DocSet andNot(DocSet other) {
andNot(getFixedBitSet(), other);
resetSize();
return this;
}

@Override
public DocSet union(DocSet other) {
throw new UnsupportedOperationException();
}

@Override
public BitDocSet clone() {
throw new UnsupportedOperationException();
}

@Override
public String toString() {
return "MutableBitDocSet instance of " + super.toString();
}
}
88 changes: 52 additions & 36 deletions solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -954,21 +954,26 @@ private DocSet getAndCacheDocSet(Query query) throws IOException {
return getLiveDocSet();
}

DocSet answer;
if (SolrQueryTimeoutImpl.getInstance().isTimeoutEnabled()) {
// If there is a possibility of timeout for this query, then don't reserve a computation slot.
// Further, we can't naively wait for an in progress computation to finish, because if we time
// out before it does then we won't even have partial results to provide. We could possibly
// wait for the query to finish in parallel with our own results and if they complete first
// use that instead, but we'll leave that to implement later.
DocSet answer = filterCache.get(query);
if (answer != null) {
return answer;
answer = filterCache.get(query);

// Not found in the cache so compute and put in the cache
if (answer == null) {
answer = getDocSetNC(query, null);
filterCache.put(query, answer);
}
answer = getDocSetNC(query, null);
filterCache.put(query, answer);
return answer;
} else {
answer = filterCache.computeIfAbsent(query, q -> getDocSetNC(q, null));
}
return filterCache.computeIfAbsent(query, q -> getDocSetNC(q, null));

assert !(answer instanceof MutableBitDocSet) : "should not be mutable";
return answer;
}

private static final MatchAllDocsQuery MATCH_ALL_DOCS_QUERY = new MatchAllDocsQuery();
Expand Down Expand Up @@ -1151,20 +1156,17 @@ public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries)
// This might become pf.answer but not if there are any non-cached filters
DocSet answer = null;

boolean[] neg = new boolean[queries.size() + 1];
DocSet[] sets = new DocSet[queries.size() + 1];
boolean[] neg = new boolean[queries.size()];
DocSet[] sets = new DocSet[queries.size()];
List<ExtendedQuery> notCached = null;
List<PostFilter> postFilters = null;

int end = 0;
int smallestIndex = -1;
int end = 0; // size of "sets" and "neg"; parallel arrays

if (setFilter != null) {
answer = sets[end++] = setFilter;
smallestIndex = end;
answer = setFilter;
} // we are done with setFilter at this point

int smallestCount = Integer.MAX_VALUE;
for (Query q : queries) {
if (q instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery) q;
Expand All @@ -1190,38 +1192,52 @@ public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries)
}

Query posQuery = QueryUtils.getAbs(q);
sets[end] = getPositiveDocSet(posQuery);
DocSet docSet = getPositiveDocSet(posQuery);
// Negative query if absolute value different from original
if (Objects.equals(q, posQuery)) {
neg[end] = false;
// keep track of the smallest positive set.
// This optimization is only worth it if size() is cached, which it would
// be if we don't do any set operations.
int sz = sets[end].size();
if (sz < smallestCount) {
smallestCount = sz;
smallestIndex = end;
answer = sets[end];
// keep track of the smallest positive set; use "answer" for this.
if (answer == null) {
answer = docSet;
continue;
}
// note: assume that size() is cached. It generally comes from the cache, so should be.
if (docSet.size() < answer.size()) {
// swap answer & docSet so that answer is smallest
DocSet tmp = answer;
answer = docSet;
docSet = tmp;
}
neg[end] = false;
} else {
neg[end] = true;
}
sets[end++] = docSet;
} // end of queries

end++;
}
if (end > 0) {
// Are all of our normal cached filters negative?
if (answer == null) {
answer = getLiveDocSet();
}

// Are all of our normal cached filters negative?
if (end > 0 && answer == null) {
answer = getLiveDocSet();
}
// This optimizes for the case where we have more than 2 filters and instead
// of copying the bitsets we make one mutable bitset. We should only do this
// for BitDocSet since it clones the backing bitset for andNot and intersection.
if (end > 1 && answer instanceof BitDocSet) {
answer = MutableBitDocSet.fromBitDocSet((BitDocSet) answer);
}

// do negative queries first to shrink set size
for (int i = 0; i < end; i++) {
if (neg[i]) answer = answer.andNot(sets[i]);
}
// do negative queries first to shrink set size
for (int i = 0; i < end; i++) {
if (neg[i]) answer = answer.andNot(sets[i]);
}

for (int i = 0; i < end; i++) {
if (!neg[i]) answer = answer.intersection(sets[i]);
}

for (int i = 0; i < end; i++) {
if (!neg[i] && i != smallestIndex) answer = answer.intersection(sets[i]);
// Make sure to keep answer as an immutable DocSet if we made it mutable
answer = MutableBitDocSet.unwrapIfMutable(answer);
}

// ignore "answer" if it simply matches all docs
Expand Down

0 comments on commit 30bf94d

Please sign in to comment.