From 0a67be867420900d9eeec223f5e498b2f0ab3418 Mon Sep 17 00:00:00 2001 From: easyice <80152403@qq.com> Date: Sat, 16 Dec 2023 23:47:15 +0800 Subject: [PATCH 1/3] init --- .../codecs/simpletext/SimpleTextDocValuesReader.java | 6 +++--- .../lucene/codecs/simpletext/SimpleTextFieldsReader.java | 4 ++-- .../codecs/simpletext/SimpleTextTermVectorsReader.java | 2 +- .../java/org/apache/lucene/codecs/TermVectorsWriter.java | 2 +- .../src/java/org/apache/lucene/index/FreqProxFields.java | 2 +- .../src/java/org/apache/lucene/util/ByteBlockPool.java | 2 +- .../src/java/org/apache/lucene/util/BytesRefArray.java | 2 +- .../src/java/org/apache/lucene/util/BytesRefBuilder.java | 7 +++++++ .../src/java/org/apache/lucene/util/OfflineSorter.java | 2 +- lucene/core/src/java/org/apache/lucene/util/fst/Util.java | 2 +- .../search/suggest/analyzing/AnalyzingSuggester.java | 4 ++-- .../search/suggest/document/CompletionFieldsConsumer.java | 2 +- .../lucene/search/suggest/fst/FSTCompletionBuilder.java | 2 +- .../lucene/search/suggest/fst/FSTCompletionLookup.java | 2 +- 14 files changed, 24 insertions(+), 17 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java index 6c36e277ad15..306f65bca8ac 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java @@ -329,7 +329,7 @@ public BytesRef apply(int docID) { } catch (ParseException pe) { throw new CorruptIndexException("failed to parse int length", in, pe); } - term.grow(len); + term.growNoCopy(len); term.setLength(len); in.readBytes(term.bytes(), 0, len); return term.get(); @@ -569,7 +569,7 @@ public BytesRef lookupOrd(int ord) throws IOException { } catch (ParseException pe) { throw new CorruptIndexException("failed to parse int length", in, pe); } - term.grow(len); + term.growNoCopy(len); term.setLength(len); in.readBytes(term.bytes(), 0, len); return term.get(); @@ -756,7 +756,7 @@ public BytesRef lookupOrd(long ord) throws IOException { } catch (ParseException pe) { throw new CorruptIndexException("failed to parse int length", in, pe); } - term.grow(len); + term.growNoCopy(len); term.setLength(len); in.readBytes(term.bytes(), 0, len); return term.get(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index abfbdd25a02d..e8fbadfe7c5d 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -600,7 +600,7 @@ public int nextPosition() throws IOException { SimpleTextUtil.readLine(in, scratch); if (StringHelper.startsWith(scratch.get(), PAYLOAD)) { final int len = scratch.length() - PAYLOAD.length; - scratch2.grow(len); + scratch2.growNoCopy(len); System.arraycopy(scratch.bytes(), PAYLOAD.length, scratch2.bytes(), 0, len); scratch2.setLength(len); payload = scratch2.get(); @@ -727,7 +727,7 @@ private void loadTerms() throws IOException { } lastDocsStart = in.getFilePointer(); final int len = scratch.length() - TERM.length; - lastTerm.grow(len); + lastTerm.growNoCopy(len); System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len); lastTerm.setLength(len); docFreq = 0; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index a107096d4bba..27656ea08cc2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -150,7 +150,7 @@ public Fields get(int doc) throws IOException { readLine(); assert StringHelper.startsWith(scratch.get(), TERMTEXT); int termLength = scratch.length() - TERMTEXT.length; - term.grow(termLength); + term.growNoCopy(termLength); term.setLength(termLength); System.arraycopy(scratch.bytes(), TERMTEXT.length, term.bytes(), 0, termLength); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java index f6d672aa41b3..45dcc504186e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java @@ -143,7 +143,7 @@ public void addProx(int numProx, DataInput positions, DataInput offsets) throws if (payload == null) { payload = new BytesRefBuilder(); } - payload.grow(payloadLength); + payload.growNoCopy(payloadLength); positions.readBytes(payload.bytes(), 0, payloadLength); payload.setLength(payloadLength); diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java index bbac4231dcd9..1042ecbeb479 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java @@ -505,7 +505,7 @@ public int nextPosition() throws IOException { hasPayload = true; // has a payload payload.setLength(posReader.readVInt()); - payload.grow(payload.length()); + payload.growNoCopy(payload.length()); posReader.readBytes(payload.bytes(), 0, payload.length()); } else { hasPayload = false; diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index c7e0997cac08..986118acdedf 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -216,7 +216,7 @@ void setBytesRef(BytesRefBuilder builder, BytesRef result, long offset, int leng result.offset = pos; } else { // Uncommon case: The slice spans at least 2 blocks, so we must copy the bytes. - builder.grow(length); + builder.growNoCopy(length); result.bytes = builder.get().bytes; result.offset = 0; readBytes(offset, result.bytes, 0, length); diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefArray.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefArray.java index 6f69843586b6..dd8473a408b5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefArray.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefArray.java @@ -94,7 +94,7 @@ public BytesRef get(BytesRefBuilder spare, int index) { Objects.checkIndex(index, lastElement); int offset = offsets[index]; int length = index == lastElement - 1 ? currentOffset - offset : offsets[index + 1] - offset; - spare.grow(length); + spare.growNoCopy(length); spare.setLength(length); pool.readBytes(offset, spare.bytes(), 0, spare.length()); return spare.get(); diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java index 97c1fac2032f..d6f67061e19c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java @@ -60,6 +60,13 @@ public void grow(int capacity) { ref.bytes = ArrayUtil.grow(ref.bytes, capacity); } + /** + * Used to grow the builder without coping bytes. see {@link ArrayUtil#growNoCopy(byte[], int)}. + */ + public void growNoCopy(int capacity) { + ref.bytes = ArrayUtil.growNoCopy(ref.bytes, capacity); + } + /** Append a single byte to this builder. */ public void append(byte b) { grow(ref.length + 1); diff --git a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java index 21fbf76d045a..f8784c88702f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/OfflineSorter.java @@ -593,7 +593,7 @@ public BytesRef next() throws IOException { } short length = in.readShort(); - ref.grow(length); + ref.growNoCopy(length); ref.setLength(length); in.readBytes(ref.bytes(), 0, length); return ref.get(); diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java index 9fdc460d0583..2609e08f31fc 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java @@ -794,7 +794,7 @@ public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) { /** Just converts IntsRef to BytesRef; you must ensure the int values fit into a byte. */ public static BytesRef toBytesRef(IntsRef input, BytesRefBuilder scratch) { - scratch.grow(input.length); + scratch.growNoCopy(input.length); for (int i = 0; i < input.length; i++) { int value = input.ints[i + input.offset]; // NOTE: we allow -128 to 255 diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index fc585441d61b..b31330035bdb 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -452,7 +452,7 @@ public void build(InputIterator iterator) throws IOException { payload = null; } - buffer = ArrayUtil.grow(buffer, requiredLength); + buffer = ArrayUtil.growNoCopy(buffer, requiredLength); output.reset(buffer); @@ -522,7 +522,7 @@ public void build(InputIterator iterator) throws IOException { } input.reset(bytes.bytes, bytes.offset, bytes.length); short analyzedLength = input.readShort(); - analyzed.grow(analyzedLength + 2); + analyzed.growNoCopy(analyzedLength + 2); input.readBytes(analyzed.bytes(), 0, analyzedLength); analyzed.setLength(analyzedLength); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java index c1db7eb62e1f..8c6a0856f05b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsConsumer.java @@ -223,7 +223,7 @@ public void write(BytesRef term, TermsEnum termsEnum) throws IOException { ByteArrayDataInput input = new ByteArrayDataInput(payload.bytes, payload.offset, payload.length); int len = input.readVInt(); - scratch.grow(len); + scratch.growNoCopy(len); scratch.setLength(len); input.readBytes(scratch.bytes(), 0, scratch.length()); long weight = input.readVInt() - 1; diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java index 232ce8ed3ba0..29949f845a06 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java @@ -176,7 +176,7 @@ public void add(BytesRef utf8, int bucket) throws IOException { "Bucket outside of the allowed range [0, " + buckets + "): " + bucket); } - scratch.grow(utf8.length + 10); + scratch.growNoCopy(utf8.length + 10); scratch.clear(); scratch.append((byte) bucket); scratch.append(utf8); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 44e139800545..d398fdcf318a 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -180,7 +180,7 @@ public void build(InputIterator iterator) throws IOException { int inputLineCount = 0; while ((spare = iterator.next()) != null) { if (spare.length + 4 >= buffer.length) { - buffer = ArrayUtil.grow(buffer, spare.length + 4); + buffer = ArrayUtil.growNoCopy(buffer, spare.length + 4); } output.reset(buffer); From 612d9f1e0f6304dbfe3b158246c2d082749e793d Mon Sep 17 00:00:00 2001 From: easyice <80152403@qq.com> Date: Sun, 17 Dec 2023 08:55:56 +0800 Subject: [PATCH 2/3] fix typo --- .../core/src/java/org/apache/lucene/util/BytesRefBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java index d6f67061e19c..21adf0c60da7 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefBuilder.java @@ -61,7 +61,7 @@ public void grow(int capacity) { } /** - * Used to grow the builder without coping bytes. see {@link ArrayUtil#growNoCopy(byte[], int)}. + * Used to grow the builder without copying bytes. see {@link ArrayUtil#growNoCopy(byte[], int)}. */ public void growNoCopy(int capacity) { ref.bytes = ArrayUtil.growNoCopy(ref.bytes, capacity); From 3b7adbaa8a7badad205dcd4eca61c21e92eba89a Mon Sep 17 00:00:00 2001 From: easyice <80152403@qq.com> Date: Fri, 19 Jan 2024 15:53:20 +0800 Subject: [PATCH 3/3] support growNoCopy in IntsRefBuilder --- .../analysis/ja/dict/TokenInfoDictionaryBuilder.java | 2 +- .../apache/lucene/analysis/ja/dict/UserDictionary.java | 2 +- .../analysis/ko/dict/TokenInfoDictionaryBuilder.java | 2 +- .../apache/lucene/analysis/ko/dict/UserDictionary.java | 2 +- .../src/java/org/apache/lucene/util/IntsRefBuilder.java | 9 +++++++-- .../core/src/java/org/apache/lucene/util/fst/Util.java | 2 +- .../java/org/apache/lucene/tests/util/fst/FSTTester.java | 2 +- 7 files changed, 13 insertions(+), 8 deletions(-) diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java index 8afddb9ca961..7169097cdd43 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java @@ -120,7 +120,7 @@ private TokenInfoDictionaryWriter buildDictionary(List csvFiles) throws IO // new word to add to fst ord++; lastValue = token; - scratch.grow(token.length()); + scratch.growNoCopy(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { scratch.setIntAt(i, (int) token.charAt(i)); diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java index 42807eed2784..beb439cde862 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java @@ -138,7 +138,7 @@ public int compare(String[] left, String[] right) { } // add mapping to FST String token = values[0]; - scratch.grow(token.length()); + scratch.growNoCopy(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { scratch.setIntAt(i, (int) token.charAt(i)); diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java index f66abba8b3ad..2e7996d8b75a 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java @@ -116,7 +116,7 @@ private TokenInfoDictionaryWriter buildDictionary(List csvFiles) throws IO // new word to add to fst ord++; lastValue = surfaceForm; - scratch.grow(surfaceForm.length()); + scratch.growNoCopy(surfaceForm.length()); scratch.setLength(surfaceForm.length()); for (int i = 0; i < surfaceForm.length(); i++) { scratch.setIntAt(i, surfaceForm.charAt(i)); diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java index 4632edc8a211..6219be1e8139 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java @@ -121,7 +121,7 @@ private UserDictionary(List entries) throws IOException { } // add mapping to FST - scratch.grow(token.length()); + scratch.growNoCopy(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { scratch.setIntAt(i, token.charAt(i)); diff --git a/lucene/core/src/java/org/apache/lucene/util/IntsRefBuilder.java b/lucene/core/src/java/org/apache/lucene/util/IntsRefBuilder.java index c1f7078da231..7ab208c3ba1c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntsRefBuilder.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntsRefBuilder.java @@ -77,9 +77,14 @@ public void grow(int newLength) { ref.ints = ArrayUtil.grow(ref.ints, newLength); } + /** Grow the reference array without copying the origin data to the new array. */ + public void growNoCopy(int newLength) { + ref.ints = ArrayUtil.growNoCopy(ref.ints, newLength); + } + /** Copies the given array into this instance. */ public void copyInts(int[] otherInts, int otherOffset, int otherLength) { - grow(otherLength); + growNoCopy(otherLength); System.arraycopy(otherInts, otherOffset, ref.ints, 0, otherLength); ref.length = otherLength; } @@ -94,7 +99,7 @@ public void copyInts(IntsRef ints) { * UTF-8 to UTF-32 and then copied into this builder. */ public void copyUTF8Bytes(BytesRef bytes) { - grow(bytes.length); + growNoCopy(bytes.length); ref.length = UnicodeUtil.UTF8toUTF32(bytes, ref.ints); } diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java index 2609e08f31fc..5800b22028db 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java @@ -738,7 +738,7 @@ private static String printableLabel(int label) { public static IntsRef toUTF16(CharSequence s, IntsRefBuilder scratch) { final int charLimit = s.length(); scratch.setLength(charLimit); - scratch.grow(charLimit); + scratch.growNoCopy(charLimit); for (int idx = 0; idx < charLimit; idx++) { scratch.setIntAt(idx, s.charAt(idx)); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java index 07f877575b00..1564b2a7e88a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/fst/FSTTester.java @@ -155,7 +155,7 @@ static IntsRef toIntsRefUTF32(String s, IntsRefBuilder ir) { } static IntsRef toIntsRef(BytesRef br, IntsRefBuilder ir) { - ir.grow(br.length); + ir.growNoCopy(br.length); ir.clear(); for (int i = 0; i < br.length; i++) { ir.append(br.bytes[br.offset + i] & 0xFF);