Skip to content

Commit

Permalink
[Upgrade] Lucene 9.0.0 release (#1109)
Browse files Browse the repository at this point in the history
This commit upgrades the core codebase from Lucene 8.10.1 to
lucene 9.0.0. It includes all necessary refactoring of features and
API changes when upgrading to a new major Lucene release.

Signed-off-by: Nicholas Walter Knize <[email protected]>
Co-authored-by: Andriy Redko <[email protected]>
  • Loading branch information
nknize and reta authored Mar 15, 2022
1 parent 757abdb commit 006c832
Show file tree
Hide file tree
Showing 274 changed files with 3,052 additions and 980 deletions.
5 changes: 4 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,10 @@ tasks.register("branchConsistency") {
allprojects {
// configure compiler options
tasks.withType(JavaCompile).configureEach { JavaCompile compile ->
compile.options.compilerArgs << '-Werror'
// See please https://bugs.openjdk.java.net/browse/JDK-8209058
if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_11) {
compile.options.compilerArgs << '-Werror'
}
compile.options.compilerArgs << '-Xlint:auxiliaryclass'
compile.options.compilerArgs << '-Xlint:cast'
compile.options.compilerArgs << '-Xlint:classfile'
Expand Down
2 changes: 1 addition & 1 deletion buildSrc/src/main/resources/minimumRuntimeVersion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
11
11
4 changes: 2 additions & 2 deletions buildSrc/version.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
opensearch = 2.0.0
lucene = 8.10.1
lucene = 9.0.0

bundled_jdk_vendor = adoptium
bundled_jdk = 17.0.2+8
Expand All @@ -11,7 +11,7 @@ spatial4j = 0.7
jts = 1.15.0
jackson = 2.12.6
snakeyaml = 1.26
icu4j = 62.1
icu4j = 68.2
supercsv = 2.4.0
log4j = 2.17.1
slf4j = 1.6.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@

package org.opensearch.common.settings;

import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NIOFSDirectory;
Expand Down Expand Up @@ -328,13 +330,14 @@ private void possiblyAlterEncryptedBytes(
byte[] encryptedBytes,
int truncEncryptedDataLength
) throws Exception {
indexOutput.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
indexOutput.writeInt(salt.length);
indexOutput.writeBytes(salt, salt.length);
indexOutput.writeInt(iv.length);
indexOutput.writeBytes(iv, iv.length);
indexOutput.writeInt(encryptedBytes.length - truncEncryptedDataLength);
indexOutput.writeBytes(encryptedBytes, encryptedBytes.length);
DataOutput io = EndiannessReverserUtil.wrapDataOutput(indexOutput);
io.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
io.writeInt(salt.length);
io.writeBytes(salt, salt.length);
io.writeInt(iv.length);
io.writeBytes(iv, iv.length);
io.writeInt(encryptedBytes.length - truncEncryptedDataLength);
io.writeBytes(encryptedBytes, encryptedBytes.length);
}

public void testUpgradeAddsSeed() throws Exception {
Expand Down Expand Up @@ -363,7 +366,7 @@ public void testBackcompatV1() throws Exception {
assumeFalse("Can't run in a FIPS JVM as PBE is not available", inFipsJvm());
Path configDir = env.configFile();
NIOFSDirectory directory = new NIOFSDirectory(configDir);
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {
CodecUtil.writeHeader(output, "opensearch.keystore", 1);
output.writeByte((byte) 0); // hasPassword = false
output.writeString("PKCS12");
Expand Down Expand Up @@ -396,7 +399,7 @@ public void testBackcompatV2() throws Exception {
NIOFSDirectory directory = new NIOFSDirectory(configDir);
byte[] fileBytes = new byte[20];
random().nextBytes(fileBytes);
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {

CodecUtil.writeHeader(output, "opensearch.keystore", 2);
output.writeByte((byte) 0); // hasPassword = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
package org.opensearch.analysis.common;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.classic.ClassicFilter;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
package org.opensearch.analysis.common;

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.classic.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
import org.apache.lucene.analysis.classic.ClassicFilter;
import org.apache.lucene.analysis.classic.ClassicTokenizer;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.core.KeywordTokenizer;
Expand All @@ -64,6 +66,7 @@
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
Expand Down Expand Up @@ -113,10 +116,7 @@
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.th.ThaiTokenizer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,18 @@ public TokenStream create(TokenStream tokenStream) {

private Map<String, String> convertSettings(Settings settings) {
Map<String, String> settingMap = new HashMap<>();
settingMap.put("hashCount", settings.get("hash_count"));
settingMap.put("bucketCount", settings.get("bucket_count"));
settingMap.put("hashSetSize", settings.get("hash_set_size"));
settingMap.put("withRotation", settings.get("with_rotation"));
if (settings.hasValue("hash_count")) {
settingMap.put("hashCount", settings.get("hash_count"));
}
if (settings.hasValue("bucketCount")) {
settingMap.put("bucketCount", settings.get("bucket_count"));
}
if (settings.hasValue("hashSetSize")) {
settingMap.put("hashSetSize", settings.get("hash_set_size"));
}
if (settings.hasValue("with_rotation")) {
settingMap.put("withRotation", settings.get("with_rotation"));
}
return settingMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,15 @@ protected Map<String, Class<?>> getTokenFilters() {
filters.put("latvianstem", StemmerTokenFilterFactory.class);
filters.put("norwegianlightstem", StemmerTokenFilterFactory.class);
filters.put("norwegianminimalstem", StemmerTokenFilterFactory.class);
filters.put("norwegiannormalization", Void.class);
filters.put("portuguesestem", StemmerTokenFilterFactory.class);
filters.put("portugueselightstem", StemmerTokenFilterFactory.class);
filters.put("portugueseminimalstem", StemmerTokenFilterFactory.class);
filters.put("russianlightstem", StemmerTokenFilterFactory.class);
filters.put("soranistem", StemmerTokenFilterFactory.class);
filters.put("spanishlightstem", StemmerTokenFilterFactory.class);
filters.put("swedishlightstem", StemmerTokenFilterFactory.class);
filters.put("swedishminimalstem", Void.class);
filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
filters.put("kstem", KStemTokenFilterFactory.class);
filters.put("synonym", SynonymTokenFilterFactory.class);
Expand Down Expand Up @@ -242,7 +244,7 @@ protected Map<String, Class<?>> getPreConfiguredTokenizers() {
tokenizers.put("keyword", null);
tokenizers.put("lowercase", Void.class);
tokenizers.put("classic", null);
tokenizers.put("uax_url_email", org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class);
tokenizers.put("uax_url_email", org.apache.lucene.analysis.email.UAX29URLEmailTokenizerFactory.class);
tokenizers.put("path_hierarchy", null);
tokenizers.put("letter", null);
tokenizers.put("whitespace", null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,15 @@ public void setup() {
// parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
// that ignores position length attribute
expectedQueryWithUnigram = new BooleanQuery.Builder().add(
new SynonymQuery(new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar")),
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "foo"))
.addTerm(new Term("text_shingle_unigram", "foo bar"))
.build(),
BooleanClause.Occur.SHOULD
)
.add(
new SynonymQuery(new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz")),
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "bar"))
.addTerm(new Term("text_shingle_unigram", "bar baz"))
.build(),
BooleanClause.Occur.SHOULD
)
.add(new TermQuery(new Term("text_shingle_unigram", "baz")), BooleanClause.Occur.SHOULD)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0a3d818d6f6fb113831ed34553b24763fbda1e84
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.expressions.js.VariableContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.SortField;
import org.opensearch.SpecialPermission;
import org.opensearch.common.Nullable;
import org.opensearch.index.fielddata.IndexFieldData;
Expand Down Expand Up @@ -263,7 +262,7 @@ private static NumberSortScript.LeafFactory newSortScript(Expression expr, Searc
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (vars != null && vars.containsKey(variable)) {
bindFromParams(vars, bindings, variable);
Expand Down Expand Up @@ -320,7 +319,7 @@ private static AggregationScript.LeafFactory newAggregationScript(
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (variable.equals("_value")) {
specialValue = new ReplaceableConstDoubleValueSource();
Expand Down Expand Up @@ -393,7 +392,7 @@ private static ScoreScript.LeafFactory newScoreScript(Expression expr, SearchLoo
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (variable.equals("_value")) {
specialValue = new ReplaceableConstDoubleValueSource();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ grant {
permission org.opensearch.script.ClassPermission "java.lang.Math";
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.MathUtil";
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.SloppyMath";
permission org.opensearch.script.ClassPermission "org.apache.lucene.expressions.js.ExpressionMath";
};
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
Expand All @@ -52,10 +56,6 @@
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
Expand All @@ -47,9 +50,6 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.opensearch.common.Strings;
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.opensearch.common.xcontent.XContentBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.join.JoinUtil;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.similarities.Similarity;
Expand Down Expand Up @@ -409,6 +410,11 @@ public static final class LateParsingQuery extends Query {
this.similarity = similarity;
}

@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
Expand All @@ -56,7 +56,6 @@
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.Set;

final class PercolateQuery extends Query implements Accountable {

Expand Down Expand Up @@ -112,8 +111,6 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> set) {}

@Override
public Explanation explain(LeafReaderContext leafReaderContext, int docId) throws IOException {
Expand Down Expand Up @@ -245,6 +242,11 @@ Query getVerifiedMatchesQuery() {
return verifiedMatchesQuery;
}

@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}

// Comparing identity here to avoid being cached
// Note that in theory if the same instance gets used multiple times it could still get cached,
// however since we create a new query instance each time we this query this shouldn't happen and thus
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.sandbox.search.CoveringQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchNoDocsQuery;
Expand Down Expand Up @@ -279,7 +279,7 @@ Query percolateQuery(
}
Query filter = null;
if (excludeNestedDocuments) {
filter = Queries.newNonNestedFilter(indexVersion);
filter = Queries.newNonNestedFilter();
}
return new PercolateQuery(name, queryStore, documents, candidateQuery, searcher, filter, verifiedMatchesQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.opensearch.Version;
import org.opensearch.common.document.DocumentField;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.search.fetch.FetchContext;
Expand Down Expand Up @@ -127,7 +126,7 @@ static class PercolateContext {
this.percolateQuery = pq;
this.singlePercolateQuery = singlePercolateQuery;
IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(Version.CURRENT));
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter());
Weight weight = percolatorIndexSearcher.createWeight(nonNestedFilter, ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
Expand All @@ -148,7 +147,7 @@ Query filterNestedDocs(Query in) {
if (rootDocsBySlot != null) {
// Ensures that we filter out nested documents
return new BooleanQuery.Builder().add(in, BooleanClause.Occur.MUST)
.add(Queries.newNonNestedFilter(Version.CURRENT), BooleanClause.Occur.FILTER)
.add(Queries.newNonNestedFilter(), BooleanClause.Occur.FILTER)
.build();
}
return in;
Expand Down
Loading

0 comments on commit 006c832

Please sign in to comment.