Skip to content

Commit

Permalink
Merge branch 'main' into fstpostingformat-off-heap
Browse files Browse the repository at this point in the history
  • Loading branch information
dungba88 authored Dec 28, 2023
2 parents cc249e1 + 948970b commit 78ff079
Show file tree
Hide file tree
Showing 102 changed files with 1,036 additions and 541 deletions.
10 changes: 8 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,21 @@ First of all, you need the Lucene source code.

Get the source code using: `git clone https://github.com/apache/lucene`

### Notes for required Java version
Please note that it is important to preserve the files' original line breaks - some of them have their checksums verified during build.
If you are using Windows you might want to override the default Git configuration when cloning the repository:
`git clone --config core.autocrlf=false https://github.com/apache/lucene`

### Pre-requisites

Be sure that you are using an appropriate version of the JDK. Please check [README](./README.md) for the required JDK version for current main branch.

Some build tasks (in particular `./gradlew check`) require Perl and Python 3.

### Building with Gradle

Lucene uses [Gradle](https://gradle.org/) for build control. Gradle is itself Java-based and may be incompatible with newer Java versions; you can still build and test Lucene with these Java releases, see [jvms.txt](./help/jvms.txt) for more information.

NOTE: DO NOT use the `gradle` command that is perhaps installed on your machine. This may result in using a different gradle version than the project requires and this is known to lead to very cryptic errors. The "gradle wrapper" (gradlew script) does everything required to build the project from scratch: it downloads the correct version of gradle, sets up sane local configurations and is tested on multiple environments.
NOTE: DO NOT use the `gradle` command that is perhaps installed on your machine. This may result in using a different gradle version than the project requires and this is known to lead to very cryptic errors. The "gradle wrapper" (`gradlew` script) does everything required to build the project from scratch: it downloads the correct version of gradle, sets up sane local configurations and is tested on multiple environments.

The first time you run gradlew, it will create a file "gradle.properties" that contains machine-specific settings. Normally you can use this file as-is, but it can be modified if necessary.

Expand Down
8 changes: 4 additions & 4 deletions dev-tools/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ as to the usefulness of the tools.

Description of dev-tools/ contents:

./size-estimator-lucene-solr.xls -- Spreadsheet for estimating memory and disk usage in Lucene/Solr
./aws-jmh/ -- Scripts for running microbenchmarks across different ec2 instance types.
./doap/ -- Lucene project descriptors in DOAP RDF format.
./missing-doclet -- JavaDoc validation doclet subproject
./doap/ -- Lucene and Solr project descriptors in DOAP RDF format.
./scripts/ -- Odds and ends for building releases, etc.
./test-patch/ -- Scripts for automatically validating patches
./scripts/ -- Odds and ends for building releases, etc.
./test-patch/ -- Scripts for automatically validating patches
7 changes: 7 additions & 0 deletions dev-tools/doap/lucene.rdf
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@
</maintainer>

<!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
<release>
<Version>
<name>lucene-9.9.1</name>
<created>2023-12-16</created>
<revision>9.9.1</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-9.9.0</name>
Expand Down
2 changes: 1 addition & 1 deletion gradle/help.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ configure(rootProject) {
help {
doLast {
println ""
println "This is an experimental Lucene/Solr gradle build. See some"
println "This is Lucene's gradle build. See some"
println "guidelines, ant-equivalent commands etc. under help/*; or type:"
println ""
helpFiles.each { section, path, sectionInfo ->
Expand Down
1 change: 0 additions & 1 deletion gradle/testing/randomization.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ allprojects {
"tests.leaveTemporary",
"tests.leavetemporary",
"tests.leavetmpdir",
"solr.test.leavetmpdir",
].find { prop ->
def v = Boolean.parseBoolean(propertyOrDefault(prop, "false"))
if (v) {
Expand Down
8 changes: 0 additions & 8 deletions gradle/testing/randomization/policies/tests.policy
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ grant {
permission java.lang.RuntimePermission "getFileStoreAttributes";
permission java.lang.RuntimePermission "writeFileDescriptor";

// needed to check if C2 (implied by the presence of the CI env) is enabled
permission java.lang.RuntimePermission "getenv.CI";

// TestLockFactoriesMultiJVM opens a random port on 127.0.0.1 (port 0 = ephemeral port range):
permission java.net.SocketPermission "127.0.0.1:0", "accept,listen,resolve";

Expand All @@ -80,11 +77,6 @@ grant {
// used by nested tests? (e.g. TestLeaveFilesIfTestFails). TODO: look into this
permission java.util.PropertyPermission "tests.runnested", "write";

// solr properties. TODO: move these out to SolrTestCase
permission java.util.PropertyPermission "solr.data.dir", "write";
permission java.util.PropertyPermission "solr.solr.home", "write";
permission java.util.PropertyPermission "solr.directoryFactory", "write";

// allows LuceneTestCase#runWithRestrictedPermissions to execute with lower (or no) permission
permission java.security.SecurityPermission "createAccessControlContext";

Expand Down
2 changes: 1 addition & 1 deletion help/formatting.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ IMPORTANT: There is *no* way to mark sections of the code as excluded
from formatting. This is by design and cannot be altered. In vast
majority of cases the formatter will do a great job of cleaning up the
code. Occasionally you may want to rewrite the code (introduce a local
variable orreshape code paths) so that it's easier to read after
variable or reshape code paths) so that it's easier to read after
automatic formatting.
11 changes: 10 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ API Changes
* GITHUB#12980: Make FSTPostingsFormat to build FST off-heap. This PostingsFormat will now
create 2 FST files (tfp.meta and tfp.data) instead of a single one. (Anh Dung Bui)

* GITHUB#12875: Ensure token position is always increased in PathHierarchyTokenizer and ReversePathHierarchyTokenizer
and resulting tokens do not overlap. (Michael Froh, Lukáš Vlček)

New Features
---------------------

Expand Down Expand Up @@ -120,6 +123,8 @@ Optimizations

* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)

* GITHUB#12841: Move group-varint encoding/decoding logic to DataOutput/DataInput. (Adrien Grand, Zhang Chao, Uwe Schindler)

Bug Fixes
---------------------

Expand Down Expand Up @@ -191,7 +196,9 @@ Improvements

Optimizations
---------------------
(No changes)

* GITHUB#12839: Introduce method to grow arrays up to a given upper limit and use it to reduce overallocation for
DirectoryTaxonomyReader#getBulkOrdinals. (Stefan Vodita)

Bug Fixes
---------------------
Expand All @@ -216,6 +223,8 @@ Other

* GITHUB#11023: Removing @lucene.experimental tags in testXXX methods in CheckIndex. (Jakub Slowinski)

* GITHUB#12934: Cleaning up old references to Lucene/Solr. (Jakub Slowinski)

======================== Lucene 9.9.1 =======================

Bug Fixes
Expand Down
5 changes: 5 additions & 0 deletions lucene/MIGRATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,11 @@ It now declares that it may throw `IOException`. This was an oversight because
compiled expressions call `DoubleValues#doubleValue` behind the scenes, which
may throw `IOException` on index problems, bubbling up unexpectedly to the caller.

### PathHierarchyTokenizer and ReversePathHierarchyTokenizer do not produce overlapping tokens

`(Reverse)PathHierarchyTokenizer` now produces sequential (instead of overlapping) tokens with accurate
offsets, making positional queries and highlighters possible for fields tokenized with this tokenizer.

## Migration from Lucene 9.0 to Lucene 9.1

### Test framework package migration and module (LUCENE-10301)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ public PathHierarchyTokenizer(

private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionIncrementAttribute posIncAtt =
addAttribute(PositionIncrementAttribute.class);
private int startPosition = 0;
private int skipped = 0;
private boolean endDelimiter = false;
Expand All @@ -112,11 +113,7 @@ public PathHierarchyTokenizer(
public final boolean incrementToken() throws IOException {
clearAttributes();
termAtt.append(resultToken);
if (resultToken.length() == 0) {
posAtt.setPositionIncrement(1);
} else {
posAtt.setPositionIncrement(0);
}
posIncAtt.setPositionIncrement(1);
int length = 0;
boolean added = false;
if (endDelimiter) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ public ReversePathHierarchyTokenizer(

private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
private final PositionIncrementAttribute posIncAtt =
addAttribute(PositionIncrementAttribute.class);

private int endPosition = 0;
private int finalOffset = 0;
Expand Down Expand Up @@ -158,10 +159,8 @@ public final boolean incrementToken() throws IOException {
endPosition = delimiterPositions.get(idx);
}
finalOffset = correctOffset(length);
posAtt.setPositionIncrement(1);
} else {
posAtt.setPositionIncrement(0);
}
posIncAtt.setPositionIncrement(1);

while (skipped < delimitersCount - skip - 1) {
int start = delimiterPositions.get(skipped);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import static org.apache.lucene.analysis.path.PathHierarchyTokenizer.DEFAULT_DELIMITER;
import static org.apache.lucene.analysis.path.PathHierarchyTokenizer.DEFAULT_SKIP;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Random;
Expand All @@ -41,7 +42,7 @@ public void testBasic() throws Exception {
new String[] {"/a", "/a/b", "/a/b/c"},
new int[] {0, 0, 0},
new int[] {2, 4, 6},
new int[] {1, 0, 0},
new int[] {1, 1, 1},
path.length());
}

Expand All @@ -56,7 +57,7 @@ public void testEndOfDelimiter() throws Exception {
new String[] {"/a", "/a/b", "/a/b/c", "/a/b/c/"},
new int[] {0, 0, 0, 0},
new int[] {2, 4, 6, 7},
new int[] {1, 0, 0, 0},
new int[] {1, 1, 1, 1},
path.length());
}

Expand All @@ -71,7 +72,7 @@ public void testStartOfChar() throws Exception {
new String[] {"a", "a/b", "a/b/c"},
new int[] {0, 0, 0},
new int[] {1, 3, 5},
new int[] {1, 0, 0},
new int[] {1, 1, 1},
path.length());
}

Expand All @@ -86,7 +87,7 @@ public void testStartOfCharEndOfDelimiter() throws Exception {
new String[] {"a", "a/b", "a/b/c", "a/b/c/"},
new int[] {0, 0, 0, 0},
new int[] {1, 3, 5, 6},
new int[] {1, 0, 0, 0},
new int[] {1, 1, 1, 1},
path.length());
}

Expand All @@ -111,7 +112,7 @@ public void testOnlyDelimiters() throws Exception {
new String[] {"/", "//"},
new int[] {0, 0},
new int[] {1, 2},
new int[] {1, 0},
new int[] {1, 1},
path.length());
}

Expand All @@ -125,7 +126,7 @@ public void testReplace() throws Exception {
new String[] {"\\a", "\\a\\b", "\\a\\b\\c"},
new int[] {0, 0, 0},
new int[] {2, 4, 6},
new int[] {1, 0, 0},
new int[] {1, 1, 1},
path.length());
}

Expand All @@ -139,7 +140,7 @@ public void testWindowsPath() throws Exception {
new String[] {"c:", "c:\\a", "c:\\a\\b", "c:\\a\\b\\c"},
new int[] {0, 0, 0, 0},
new int[] {2, 4, 6, 8},
new int[] {1, 0, 0, 0},
new int[] {1, 1, 1, 1},
path.length());
}

Expand All @@ -158,7 +159,7 @@ public void testNormalizeWinDelimToLinuxDelim() throws Exception {
new String[] {"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
new int[] {0, 0, 0, 0},
new int[] {2, 4, 6, 8},
new int[] {1, 0, 0, 0},
new int[] {1, 1, 1, 1},
path.length());
}

Expand All @@ -172,7 +173,7 @@ public void testBasicSkip() throws Exception {
new String[] {"/b", "/b/c"},
new int[] {2, 2},
new int[] {4, 6},
new int[] {1, 0},
new int[] {1, 1},
path.length());
}

Expand All @@ -186,7 +187,7 @@ public void testEndOfDelimiterSkip() throws Exception {
new String[] {"/b", "/b/c", "/b/c/"},
new int[] {2, 2, 2},
new int[] {4, 6, 7},
new int[] {1, 0, 0},
new int[] {1, 1, 1},
path.length());
}

Expand All @@ -200,7 +201,7 @@ public void testStartOfCharSkip() throws Exception {
new String[] {"/b", "/b/c"},
new int[] {1, 1},
new int[] {3, 5},
new int[] {1, 0},
new int[] {1, 1},
path.length());
}

Expand All @@ -214,7 +215,7 @@ public void testStartOfCharEndOfDelimiterSkip() throws Exception {
new String[] {"/b", "/b/c", "/b/c/"},
new int[] {1, 1, 1},
new int[] {3, 5, 6},
new int[] {1, 0, 0},
new int[] {1, 1, 1},
path.length());
}

Expand Down Expand Up @@ -270,4 +271,20 @@ protected TokenStreamComponents createComponents(String fieldName) {
checkRandomData(random, a, 100 * RANDOM_MULTIPLIER, 1027, false, false);
a.close();
}

private final Analyzer analyzer =
new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new PathHierarchyTokenizer();
return new TokenStreamComponents(tokenizer);
}
};

public void testTokenizerViaAnalyzerOutput() throws IOException {
assertAnalyzesTo(analyzer, "a/b/c", new String[] {"a", "a/b", "a/b/c"});
assertAnalyzesTo(analyzer, "a/b/c/", new String[] {"a", "a/b", "a/b/c", "a/b/c/"});
assertAnalyzesTo(analyzer, "/a/b/c", new String[] {"/a", "/a/b", "/a/b/c"});
assertAnalyzesTo(analyzer, "/a/b/c/", new String[] {"/a", "/a/b", "/a/b/c", "/a/b/c/"});
}
}
Loading

0 comments on commit 78ff079

Please sign in to comment.