Skip to content

Commit

Permalink
Beef up Terms#intersect checks in CheckIndex. (#12926)
Browse files Browse the repository at this point in the history
Now also testing what happens with a non-null `startTerm`. This found bugs in
`DirectPostingsFormat`.
  • Loading branch information
jpountz authored Dec 19, 2023
1 parent 5d6086e commit bf45ab7
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,10 @@ public DirectIntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm)

while (label > states[i].transitionMax) {
states[i].transitionUpto++;
assert states[i].transitionUpto < states[i].transitionCount;
if (states[i].transitionUpto >= states[i].transitionCount) {
// All transitions compare less than the required label
break;
}
transitionAccessor.getNextTransition(states[i].transition);
states[i].transitionMin = states[i].transition.min;
states[i].transitionMax = states[i].transition.max;
Expand Down Expand Up @@ -1119,12 +1122,14 @@ public DirectIntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm)
}
}

final int termOffset = termOffsets[termOrd];
final int termLen = termOffsets[1 + termOrd] - termOffset;
if (termOrd >= 0) {
final int termOffset = termOffsets[termOrd];
final int termLen = termOffsets[1 + termOrd] - termOffset;

if (termOrd >= 0 && !startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
stateUpto -= skipUpto;
termOrd--;
if (!startTerm.equals(new BytesRef(termBytes, termOffset, termLen))) {
stateUpto -= skipUpto;
termOrd--;
}
}
// if (DEBUG) {
// System.out.println(" loop end; return termOrd=" + termOrd + " stateUpto=" +
Expand Down
72 changes: 53 additions & 19 deletions lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -2305,31 +2305,26 @@ private static Status.TermIndexStatus checkFields(
}

// Test Terms#intersect
TermsEnum allTerms = terms.iterator();
// An automaton that should match a good number of terms
Automaton a =
Automaton automaton =
Operations.concatenate(
Arrays.asList(
Automata.makeAnyBinary(),
Automata.makeCharRange('a', 'e'),
Automata.makeAnyBinary()));
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton ca = new CompiledAutomaton(a);
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
TermsEnum filteredTerms = terms.intersect(ca, null);
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) {
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
BytesRef filteredTerm = filteredTerms.next();
if (Objects.equals(term, filteredTerm) == false) {
throw new CheckIndexException(
"Expected next filtered term: " + term + ", but got " + filteredTerm);
}
}
}
BytesRef filteredTerm = filteredTerms.next();
if (filteredTerm != null) {
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
}
BytesRef startTerm = null;
checkTermsIntersect(terms, automaton, startTerm);

startTerm = new BytesRef();
checkTermsIntersect(terms, automaton, startTerm);

automaton = Automata.makeAnyBinary();
startTerm = new BytesRef(new byte[] {'l'});
checkTermsIntersect(terms, automaton, startTerm);

// a term that likely compares greater than every other term in the dictionary
startTerm = new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF});
checkTermsIntersect(terms, automaton, startTerm);
}
}

Expand Down Expand Up @@ -2370,6 +2365,45 @@ private static Status.TermIndexStatus checkFields(
return status;
}

private static void checkTermsIntersect(Terms terms, Automaton automaton, BytesRef startTerm)
throws IOException {
TermsEnum allTerms = terms.iterator();
automaton = Operations.determinize(automaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton);
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(automaton);
TermsEnum filteredTerms = terms.intersect(compiledAutomaton, startTerm);
BytesRef term;
if (startTerm != null) {
switch (allTerms.seekCeil(startTerm)) {
case FOUND:
term = allTerms.next();
break;
case NOT_FOUND:
term = allTerms.term();
break;
case END:
default:
term = null;
break;
}
} else {
term = allTerms.next();
}
for (; term != null; term = allTerms.next()) {
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
BytesRef filteredTerm = filteredTerms.next();
if (Objects.equals(term, filteredTerm) == false) {
throw new CheckIndexException(
"Expected next filtered term: " + term + ", but got " + filteredTerm);
}
}
}
BytesRef filteredTerm = filteredTerms.next();
if (filteredTerm != null) {
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
}
}

/**
* For use in tests only.
*
Expand Down

0 comments on commit bf45ab7

Please sign in to comment.