diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java index 16d73eea97c0..028e2b8f59ba 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java @@ -294,4 +294,73 @@ public void testPriorBrokenCommitPoint() throws Exception { } } } + + public void testExorciseIndex() throws IOException { + try (MockDirectoryWrapper dir = newMockDirectory()) { + + // disable this normally useful test infra feature since this test intentionally leaves broken + // indices: + dir.setCheckIndexOnClose(false); + dir.listAll(); + IndexWriterConfig iwc = + new IndexWriterConfig() + .setMergePolicy(NoMergePolicy.INSTANCE) + .setIndexDeletionPolicy(DeleteNothingIndexDeletionPolicy.INSTANCE); + + try (IndexWriter iw = new IndexWriter(dir, iwc)) { + // create 3 segments by creating 3 separate commit points + // We will corrupt the first segment by deleting its compound file (_0.cfs) + // We will corrupt the second segment by deleting its segment info (_1.si) (LUCENE-7820) + // TODO: Should we test exorciseIndex() for other corruptions like missing + // term dictionary (.tis) / frequencies (.frq) / field data (.fdt) / positions (.prx) ? + // exorciseIndex() should be able to exorcise all corrupt segments and rewrite a new + // segments file + Document doc = new Document(); + doc.add(new StringField("id", "a", Field.Store.NO)); + iw.addDocument(doc); + iw.commit(); + // NOTE: we are (illegally) relying on precise file naming here -- if Codec or IW's + // behaviour changes, this may need fixing: + // We will corrupt the first segment by deleting its compound file (_0.cfs) + assertTrue(slowFileExists(dir, "_0.cfs")); + iw.addDocument(doc); + iw.commit(); + // We will corrupt the first segment by deleting its segment info (_1.si) + assertTrue(slowFileExists(dir, "_1.si")); + iw.addDocument(doc); + iw.commit(); + // We should be able to read the third segment once the corrupt segments are exorcised + assertTrue(slowFileExists(dir, "_2.si")); + } + + try (CheckIndex checkers = new CheckIndex(dir)) { + CheckIndex.Status checkIndexStatus = checkers.checkIndex(); + assertTrue(checkIndexStatus.clean); + } + + // now corrupt the segment 0 by removing its compound file (_0.cfs) + dir.deleteFile("_0.cfs"); + + // now corrupt the segment 1 by removing its segment info file (_0.si) + // Only works after addressing LUCENE-7820 + dir.deleteFile("_1.si"); + + // Index should not be clean after corruptions + try (CheckIndex checkers = new CheckIndex(dir)) { + CheckIndex.Status checkIndexStatus = checkers.checkIndex(); + assertFalse(checkIndexStatus.clean); + checkers.exorciseIndex(checkIndexStatus); + // Note that exorciseIndex does not remove any of the unreferenced files after it's done; + // you must separately open an {@link IndexWriter}, which deletes unreferenced files when + // it's created. + assertFalse(checkIndexStatus.clean); // not clean yet until we check again! + } + + // Checking for clean index after exorcism! + try (CheckIndex checkers = new CheckIndex(dir)) { + CheckIndex.Status checkIndexStatus = checkers.checkIndex(); + assertTrue(checkIndexStatus.clean); + } + } + } }