Skip to content

Commit

Permalink
Unit tests for exorciseIndex()
Browse files Browse the repository at this point in the history
  • Loading branch information
gokai committed Dec 3, 2023
1 parent 036d2a0 commit 1f24f3f
Showing 1 changed file with 69 additions and 0 deletions.
69 changes: 69 additions & 0 deletions lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,73 @@ public void testPriorBrokenCommitPoint() throws Exception {
}
}
}

public void testExorciseIndex() throws IOException {
try (MockDirectoryWrapper dir = newMockDirectory()) {

// disable this normally useful test infra feature since this test intentionally leaves broken
// indices:
dir.setCheckIndexOnClose(false);
dir.listAll();
IndexWriterConfig iwc =
new IndexWriterConfig()
.setMergePolicy(NoMergePolicy.INSTANCE)
.setIndexDeletionPolicy(DeleteNothingIndexDeletionPolicy.INSTANCE);

try (IndexWriter iw = new IndexWriter(dir, iwc)) {
// create 3 segments by creating 3 separate commit points
// We will corrupt the first segment by deleting its compound file (_0.cfs)
// We will corrupt the second segment by deleting its segment info (_1.si) (LUCENE-7820)
// TODO: Should we test exorciseIndex() for other corruptions like missing
// term dictionary (.tis) / frequencies (.frq) / field data (.fdt) / positions (.prx) ?
// exorciseIndex() should be able to exorcise all corrupt segments and rewrite a new
// segments file
Document doc = new Document();
doc.add(new StringField("id", "a", Field.Store.NO));
iw.addDocument(doc);
iw.commit();
// NOTE: we are (illegally) relying on precise file naming here -- if Codec or IW's
// behaviour changes, this may need fixing:
// We will corrupt the first segment by deleting its compound file (_0.cfs)
assertTrue(slowFileExists(dir, "_0.cfs"));
iw.addDocument(doc);
iw.commit();
// We will corrupt the first segment by deleting its segment info (_1.si)
assertTrue(slowFileExists(dir, "_1.si"));
iw.addDocument(doc);
iw.commit();
// We should be able to read the third segment once the corrupt segments are exorcised
assertTrue(slowFileExists(dir, "_2.si"));
}

try (CheckIndex checkers = new CheckIndex(dir)) {
CheckIndex.Status checkIndexStatus = checkers.checkIndex();
assertTrue(checkIndexStatus.clean);
}

// now corrupt the segment 0 by removing its compound file (_0.cfs)
dir.deleteFile("_0.cfs");

// now corrupt the segment 1 by removing its segment info file (_0.si)
// Only works after addressing LUCENE-7820
dir.deleteFile("_1.si");

// Index should not be clean after corruptions
try (CheckIndex checkers = new CheckIndex(dir)) {
CheckIndex.Status checkIndexStatus = checkers.checkIndex();
assertFalse(checkIndexStatus.clean);
checkers.exorciseIndex(checkIndexStatus);
// Note that exorciseIndex does not remove any of the unreferenced files after it's done;
// you must separately open an {@link IndexWriter}, which deletes unreferenced files when
// it's created.
assertFalse(checkIndexStatus.clean); // not clean yet until we check again!
}

// Checking for clean index after exorcism!
try (CheckIndex checkers = new CheckIndex(dir)) {
CheckIndex.Status checkIndexStatus = checkers.checkIndex();
assertTrue(checkIndexStatus.clean);
}
}
}
}

0 comments on commit 1f24f3f

Please sign in to comment.