Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Upgrade] Lucene 9.0.0 release #1109

Merged
merged 11 commits into from
Mar 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,10 @@ tasks.register("branchConsistency") {
allprojects {
// configure compiler options
tasks.withType(JavaCompile).configureEach { JavaCompile compile ->
compile.options.compilerArgs << '-Werror'
// See please https://bugs.openjdk.java.net/browse/JDK-8209058
if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_11) {
compile.options.compilerArgs << '-Werror'
}
compile.options.compilerArgs << '-Xlint:auxiliaryclass'
compile.options.compilerArgs << '-Xlint:cast'
compile.options.compilerArgs << '-Xlint:classfile'
Expand Down
2 changes: 1 addition & 1 deletion buildSrc/src/main/resources/minimumRuntimeVersion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
11
11
4 changes: 2 additions & 2 deletions buildSrc/version.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
opensearch = 2.0.0
lucene = 8.10.1
lucene = 9.0.0

bundled_jdk_vendor = adoptium
bundled_jdk = 17.0.2+8
Expand All @@ -11,7 +11,7 @@ spatial4j = 0.7
jts = 1.15.0
jackson = 2.12.6
snakeyaml = 1.26
icu4j = 62.1
icu4j = 68.2
supercsv = 2.4.0
log4j = 2.17.1
slf4j = 1.6.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@

package org.opensearch.common.settings;

import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NIOFSDirectory;
Expand Down Expand Up @@ -328,13 +330,14 @@ private void possiblyAlterEncryptedBytes(
byte[] encryptedBytes,
int truncEncryptedDataLength
) throws Exception {
indexOutput.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
indexOutput.writeInt(salt.length);
indexOutput.writeBytes(salt, salt.length);
indexOutput.writeInt(iv.length);
indexOutput.writeBytes(iv, iv.length);
indexOutput.writeInt(encryptedBytes.length - truncEncryptedDataLength);
indexOutput.writeBytes(encryptedBytes, encryptedBytes.length);
DataOutput io = EndiannessReverserUtil.wrapDataOutput(indexOutput);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nknize I am concerned that using EndiannessReverserUtil.wrapDataXxx would (potentially) generate temporary EndiannessReverserDataOutput / EndiannessReverserDataInput wrappers ("garbage"), taking into account how often they are called, it may add up unnecessary pressure to GC. Wdyt?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this isn't the most ideal but it's only used in the KeyStoreWrapperTests and not actually in any critical path.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For tests - totally agree, but Checkpoint also use wrapping, https://github.com/opensearch-project/OpenSearch/pull/1109/files#diff-f916f3d153eb8dada3c8be868ae55d050b1f24ce02343cea642f12bb0b0b8635R158, anyway probably not much we can do about that :(

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, yes. (been working this PR for a while). Not much we can do about that since the Endianness change. Fortunately that's for BWC so should only be used during rolling upgrades then the new version will take over.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:+1 thanks!

io.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
io.writeInt(salt.length);
io.writeBytes(salt, salt.length);
io.writeInt(iv.length);
io.writeBytes(iv, iv.length);
io.writeInt(encryptedBytes.length - truncEncryptedDataLength);
io.writeBytes(encryptedBytes, encryptedBytes.length);
}

public void testUpgradeAddsSeed() throws Exception {
Expand Down Expand Up @@ -363,7 +366,7 @@ public void testBackcompatV1() throws Exception {
assumeFalse("Can't run in a FIPS JVM as PBE is not available", inFipsJvm());
Path configDir = env.configFile();
NIOFSDirectory directory = new NIOFSDirectory(configDir);
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {
CodecUtil.writeHeader(output, "opensearch.keystore", 1);
output.writeByte((byte) 0); // hasPassword = false
output.writeString("PKCS12");
Expand Down Expand Up @@ -396,7 +399,7 @@ public void testBackcompatV2() throws Exception {
NIOFSDirectory directory = new NIOFSDirectory(configDir);
byte[] fileBytes = new byte[20];
random().nextBytes(fileBytes);
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {

CodecUtil.writeHeader(output, "opensearch.keystore", 2);
output.writeByte((byte) 0); // hasPassword = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
package org.opensearch.analysis.common;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.classic.ClassicFilter;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
package org.opensearch.analysis.common;

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.classic.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
import org.apache.lucene.analysis.classic.ClassicFilter;
import org.apache.lucene.analysis.classic.ClassicTokenizer;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.core.KeywordTokenizer;
Expand All @@ -64,6 +66,7 @@
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
Expand Down Expand Up @@ -113,10 +116,7 @@
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.th.ThaiTokenizer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,18 @@ public TokenStream create(TokenStream tokenStream) {

private Map<String, String> convertSettings(Settings settings) {
Map<String, String> settingMap = new HashMap<>();
settingMap.put("hashCount", settings.get("hash_count"));
settingMap.put("bucketCount", settings.get("bucket_count"));
settingMap.put("hashSetSize", settings.get("hash_set_size"));
settingMap.put("withRotation", settings.get("with_rotation"));
if (settings.hasValue("hash_count")) {
settingMap.put("hashCount", settings.get("hash_count"));
}
if (settings.hasValue("bucketCount")) {
settingMap.put("bucketCount", settings.get("bucket_count"));
}
if (settings.hasValue("hashSetSize")) {
settingMap.put("hashSetSize", settings.get("hash_set_size"));
}
if (settings.hasValue("with_rotation")) {
settingMap.put("withRotation", settings.get("with_rotation"));
}
return settingMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,15 @@ protected Map<String, Class<?>> getTokenFilters() {
filters.put("latvianstem", StemmerTokenFilterFactory.class);
filters.put("norwegianlightstem", StemmerTokenFilterFactory.class);
filters.put("norwegianminimalstem", StemmerTokenFilterFactory.class);
filters.put("norwegiannormalization", Void.class);
filters.put("portuguesestem", StemmerTokenFilterFactory.class);
filters.put("portugueselightstem", StemmerTokenFilterFactory.class);
filters.put("portugueseminimalstem", StemmerTokenFilterFactory.class);
filters.put("russianlightstem", StemmerTokenFilterFactory.class);
filters.put("soranistem", StemmerTokenFilterFactory.class);
filters.put("spanishlightstem", StemmerTokenFilterFactory.class);
filters.put("swedishlightstem", StemmerTokenFilterFactory.class);
filters.put("swedishminimalstem", Void.class);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be we could add SwedishMinimalStemFilter into StemmerTokenFilterFactory ? Same for NorwegianNormalizationFilter ?

filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
filters.put("kstem", KStemTokenFilterFactory.class);
filters.put("synonym", SynonymTokenFilterFactory.class);
Expand Down Expand Up @@ -242,7 +244,7 @@ protected Map<String, Class<?>> getPreConfiguredTokenizers() {
tokenizers.put("keyword", null);
tokenizers.put("lowercase", Void.class);
tokenizers.put("classic", null);
tokenizers.put("uax_url_email", org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class);
tokenizers.put("uax_url_email", org.apache.lucene.analysis.email.UAX29URLEmailTokenizerFactory.class);
tokenizers.put("path_hierarchy", null);
tokenizers.put("letter", null);
tokenizers.put("whitespace", null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,15 @@ public void setup() {
// parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
// that ignores position length attribute
expectedQueryWithUnigram = new BooleanQuery.Builder().add(
new SynonymQuery(new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar")),
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "foo"))
.addTerm(new Term("text_shingle_unigram", "foo bar"))
.build(),
BooleanClause.Occur.SHOULD
)
.add(
new SynonymQuery(new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz")),
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "bar"))
.addTerm(new Term("text_shingle_unigram", "bar baz"))
.build(),
BooleanClause.Occur.SHOULD
)
.add(new TermQuery(new Term("text_shingle_unigram", "baz")), BooleanClause.Occur.SHOULD)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0a3d818d6f6fb113831ed34553b24763fbda1e84
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.expressions.js.VariableContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.SortField;
import org.opensearch.SpecialPermission;
import org.opensearch.common.Nullable;
import org.opensearch.index.fielddata.IndexFieldData;
Expand Down Expand Up @@ -263,7 +262,7 @@ private static NumberSortScript.LeafFactory newSortScript(Expression expr, Searc
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (vars != null && vars.containsKey(variable)) {
bindFromParams(vars, bindings, variable);
Expand Down Expand Up @@ -320,7 +319,7 @@ private static AggregationScript.LeafFactory newAggregationScript(
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (variable.equals("_value")) {
specialValue = new ReplaceableConstDoubleValueSource();
Expand Down Expand Up @@ -393,7 +392,7 @@ private static ScoreScript.LeafFactory newScoreScript(Expression expr, SearchLoo
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (variable.equals("_value")) {
specialValue = new ReplaceableConstDoubleValueSource();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ grant {
permission org.opensearch.script.ClassPermission "java.lang.Math";
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.MathUtil";
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.SloppyMath";
permission org.opensearch.script.ClassPermission "org.apache.lucene.expressions.js.ExpressionMath";
};
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
Expand All @@ -52,10 +56,6 @@
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
Expand All @@ -47,9 +50,6 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.opensearch.common.Strings;
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.opensearch.common.xcontent.XContentBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.join.JoinUtil;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.similarities.Similarity;
Expand Down Expand Up @@ -409,6 +410,11 @@ public static final class LateParsingQuery extends Query {
this.similarity = similarity;
}

@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}

@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
Expand All @@ -56,7 +56,6 @@
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.Set;

final class PercolateQuery extends Query implements Accountable {

Expand Down Expand Up @@ -112,8 +111,6 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo
final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> set) {}

@Override
public Explanation explain(LeafReaderContext leafReaderContext, int docId) throws IOException {
Expand Down Expand Up @@ -245,6 +242,11 @@ Query getVerifiedMatchesQuery() {
return verifiedMatchesQuery;
}

@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}

// Comparing identity here to avoid being cached
// Note that in theory if the same instance gets used multiple times it could still get cached,
// however since we create a new query instance each time we this query this shouldn't happen and thus
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.sandbox.search.CoveringQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchNoDocsQuery;
Expand Down Expand Up @@ -279,7 +279,7 @@ Query percolateQuery(
}
Query filter = null;
if (excludeNestedDocuments) {
filter = Queries.newNonNestedFilter(indexVersion);
filter = Queries.newNonNestedFilter();
}
return new PercolateQuery(name, queryStore, documents, candidateQuery, searcher, filter, verifiedMatchesQuery);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.opensearch.Version;
import org.opensearch.common.document.DocumentField;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.search.fetch.FetchContext;
Expand Down Expand Up @@ -127,7 +126,7 @@ static class PercolateContext {
this.percolateQuery = pq;
this.singlePercolateQuery = singlePercolateQuery;
IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(Version.CURRENT));
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter());
Weight weight = percolatorIndexSearcher.createWeight(nonNestedFilter, ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
Expand All @@ -148,7 +147,7 @@ Query filterNestedDocs(Query in) {
if (rootDocsBySlot != null) {
// Ensures that we filter out nested documents
return new BooleanQuery.Builder().add(in, BooleanClause.Occur.MUST)
.add(Queries.newNonNestedFilter(Version.CURRENT), BooleanClause.Occur.FILTER)
.add(Queries.newNonNestedFilter(), BooleanClause.Occur.FILTER)
.build();
}
return in;
Expand Down
Loading