Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XWIKI-22635: On restart, documents to re-index aren't correctly computed #3622

Merged
merged 2 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@
*/
package org.xwiki.search.solr.internal.job;

import java.util.Comparator;
import java.util.Objects;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.xwiki.model.internal.reference.comparator.DocumentReferenceComparator;
import org.xwiki.model.internal.reference.DefaultSymbolScheme;
import org.xwiki.model.internal.reference.LocalStringEntityReferenceSerializer;
import org.xwiki.model.reference.DocumentReference;
import org.xwiki.model.reference.EntityReference;
import org.xwiki.model.reference.EntityReferenceSerializer;

/**
* Compares the list of document references from two document iterators.
Expand Down Expand Up @@ -65,7 +70,7 @@ public enum Action
/**
* Used to compare document references.
*/
private final DocumentReferenceComparator documentReferenceComparator = new DocumentReferenceComparator();
private final Comparator<DocumentReference> documentReferenceComparator = getComparator();

/**
* The last entry taken from the {@link #previous} iterator.
Expand Down Expand Up @@ -147,6 +152,22 @@ public Pair<DocumentReference, Action> next()
return new ImmutablePair<DocumentReference, Action>(documentReference, action);
}

/**
* Get the comparator used for comparing document references. This method is public for testing purposes.
*
* @return the comparator for comparing document references
*/
public static Comparator<DocumentReference> getComparator()
{
EntityReferenceSerializer<String> localEntityReferenceSerializer =
new LocalStringEntityReferenceSerializer(new DefaultSymbolScheme());
return Comparator.comparing(DocumentReference::getWikiReference)
// Compare by the whole space as string as this is also what we compare in the database and in Solr.
.thenComparing(documentReference -> localEntityReferenceSerializer.serialize(documentReference.getParent()))
.thenComparing(DocumentReference::getName)
.thenComparing(documentReference -> Objects.toString(documentReference.getLocale(), ""));
}

@Override
public long size()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.xwiki.model.reference.SpaceReference;
import org.xwiki.rendering.syntax.Syntax;
import org.xwiki.repository.test.SolrTestUtils;
import org.xwiki.search.solr.internal.job.DiffDocumentIterator;
import org.xwiki.test.docker.junit5.TestConfiguration;
import org.xwiki.test.docker.junit5.TestReference;
import org.xwiki.test.docker.junit5.UITest;
Expand All @@ -38,6 +39,7 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.startsWith;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* Integration tests for the Solr indexer, verifying re-indexing works as expected.
Expand All @@ -62,6 +64,7 @@ class SolrIndexerIT
import org.xwiki.component.util.DefaultParameterizedType
import org.xwiki.model.reference.DocumentReference
import org.xwiki.search.solr.internal.job.DatabaseDocumentIterator
import org.xwiki.search.solr.internal.job.DiffDocumentIterator
import org.xwiki.search.solr.internal.job.DocumentIterator
import org.xwiki.velocity.tools.JSONTool

Expand All @@ -71,33 +74,52 @@ class SolrIndexerIT
DocumentIterator<String> databaseIterator = services.component.getInstance(documentIterator, "database")
DocumentIterator<String> solrIterator = services.component.getInstance(documentIterator, "solr")

// Convert both iterators to lists and return them as JSON.
// Set the content type to text/plain to not trigger the JSON UI of the browser.
response.setContentType("text/plain")
response.setCharacterEncoding("UTF-8")
def output = (new JSONTool()).serialize([
// Store both iterators converted to list for the output.
def outputData = [
"database": toList(databaseIterator),
"solr": toList(solrIterator)
])
]

response.writer.print(output)
response.setContentLength(output.getBytes("UTF-8").size())
response.flushBuffer()
xcontext.setFinished(true)
}
// Re-create both iterators and use a diff iterator to verify that the diff computation works.
databaseIterator = services.component.getInstance(documentIterator, "database")
solrIterator = services.component.getInstance(documentIterator, "solr")

DocumentIterator<DiffDocumentIterator.Action> diffIterator =
new DiffDocumentIterator(solrIterator, databaseIterator)
outputData.put("diff", toList(diffIterator))

// Set the content type to text/plain to not trigger the JSON UI of the browser.
response.setContentType("text/plain")
response.setCharacterEncoding("UTF-8")
def output = (new JSONTool()).serialize(outputData)

response.writer.print(output)
response.setContentLength(output.getBytes("UTF-8").size())
response.flushBuffer()
xcontext.setFinished(true)
}

// Method to convert an iterator of pairs to a list of two-element lists.
static def toList(Iterator<Pair<DocumentReference, String>> iterator) {
static def toList(Iterator<Pair<DocumentReference, ?>> iterator) {
def list = []
Pair<DocumentReference, ?> previous = null
def comparator = DiffDocumentIterator.getComparator()
while (iterator.hasNext()) {
def pair = iterator.next()
list.add([pair.getLeft().toString(), pair.getRight()])
int comparison = -1
if (previous != null) {
comparison = comparator.compare(previous.getLeft(), pair.getLeft())
}
previous = pair
list.add([pair.getLeft().toString(), pair.getRight().toString(), String.valueOf(comparison)])
}
return list
}
// {{/groovy}}
""";

private static final String WEB_HOME = "WebHome";

@Test
void sortOrder(TestReference testReference, TestUtils testUtils, TestConfiguration testConfiguration)
throws Exception
Expand All @@ -115,10 +137,17 @@ void sortOrder(TestReference testReference, TestUtils testUtils, TestConfigurati
DocumentReference pageReference = new DocumentReference(name, testSpace);
testUtils.rest().savePage(pageReference, "Terminal page content", name);
SpaceReference spaceReference = new SpaceReference(name, testSpace);
DocumentReference nonTerminalPageReference = new DocumentReference("WebHome", spaceReference);
DocumentReference nonTerminalPageReference = new DocumentReference(WEB_HOME, spaceReference);
testUtils.rest().savePage(nonTerminalPageReference, "Non-terminal page content", name);
}

// Test confusion between space separator and similar strings inside the space.
testUtils.rest().savePage(new DocumentReference(WEB_HOME, new SpaceReference("A.b", testSpace)));
testUtils.rest().savePage(new DocumentReference(WEB_HOME,
new SpaceReference("b", new SpaceReference("A", testSpace))));
testUtils.rest().savePage(new DocumentReference(WEB_HOME, new SpaceReference("A-b", testSpace)));
testUtils.rest().savePage(new DocumentReference(WEB_HOME, new SpaceReference("AAb", testSpace)));

new SolrTestUtils(testUtils, testConfiguration.getServletEngine()).waitEmptyQueue();

// Get the output from the test script.
Expand All @@ -131,6 +160,30 @@ void sortOrder(TestReference testReference, TestUtils testUtils, TestConfigurati
{
});

assertEquals(iteratorOutput.get("solr"), iteratorOutput.get("database"));
String skipAction = DiffDocumentIterator.Action.SKIP.toString();
List<List<String>> nonSkipOperations = iteratorOutput.get("diff").stream()
.filter(item -> !skipAction.equals(item.get(1)))
.toList();
List<List<String>> databaseDocuments = iteratorOutput.get("database");
List<List<String>> solrDocuments = iteratorOutput.get("solr");

assertTrue(nonSkipOperations.isEmpty(), """
The list of operations contains non-skip operations: %s.
Documents in Solr: %s
Documents in the database: %s
""".formatted(nonSkipOperations, solrDocuments, databaseDocuments)
);

// Just to be sure, also explicitly compare the two lists of documents even though the diff iterator should
// already have done that job.
assertEquals(solrDocuments, databaseDocuments);

List<List<String>> wronglyOrderedItems =
solrDocuments.stream().filter(item -> Integer.parseInt(item.get(2)) >= 0).toList();
assertTrue(wronglyOrderedItems.isEmpty(), """
Some documents aren't "larger" than the documents before them: %s
All documents: %s
""".formatted(wronglyOrderedItems, solrDocuments)
);
}
}
Loading