From 4feaf3bdd3d24337c67f2b697de786a4c9ccfc77 Mon Sep 17 00:00:00 2001 From: Dmitry Cherniachenko <2sabio@gmail.com> Date: Thu, 15 Feb 2024 18:16:44 +0100 Subject: [PATCH] Move `brToString(BytesRef)` to `ToStringUtils` (#13068) --- lucene/CHANGES.txt | 3 +- .../lucene40/blocktree/FieldReader.java | 2 +- .../blocktree/IntersectTermsEnum.java | 13 --- .../Lucene40BlockTreeTermsReader.java | 18 ----- .../lucene40/blocktree/SegmentTermsEnum.java | 72 +++++++---------- .../blocktree/SegmentTermsEnumFrame.java | 44 ++++------- .../Lucene40BlockTreeTermsWriter.java | 63 ++++++--------- .../OrdsBlockTreeTermsReader.java | 18 ----- .../OrdsBlockTreeTermsWriter.java | 54 +++++-------- .../blocktreeords/OrdsIntersectTermsEnum.java | 39 +++++---- .../blocktreeords/OrdsSegmentTermsEnum.java | 75 ++++++++---------- .../OrdsSegmentTermsEnumFrame.java | 41 +++++----- .../lucene90/blocktree/FieldReader.java | 2 +- .../blocktree/IntersectTermsEnum.java | 13 --- .../Lucene90BlockTreeTermsReader.java | 18 ----- .../Lucene90BlockTreeTermsWriter.java | 63 ++++++--------- .../lucene90/blocktree/SegmentTermsEnum.java | 74 +++++++---------- .../blocktree/SegmentTermsEnumFrame.java | 44 ++++------- .../java/org/apache/lucene/util/BytesRef.java | 9 ++- .../org/apache/lucene/util/ToStringUtils.java | 31 ++++++++ .../idversion/IDVersionSegmentTermsEnum.java | 79 +++++++++---------- .../IDVersionSegmentTermsEnumFrame.java | 33 ++++---- .../VersionBlockTreeTermsReader.java | 18 ----- .../VersionBlockTreeTermsWriter.java | 45 ++++------- 24 files changed, 341 insertions(+), 530 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index be199050a660..9f60ae369078 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -30,7 +30,8 @@ Bug Fixes Other --------------------- -(No changes) + +* GITHUB#13068: Replace numerous `brToString(BytesRef)` copies with a `ToStringUtils` method (Dmitry Cherniachenko) ======================== Lucene 9.10.0 ======================= diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java index 3b3ea3f4806c..a05be8854b27 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java @@ -191,7 +191,7 @@ public int getDocCount() { @Override public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + - // BlockTreeTermsWriter.brToString(startTerm)); + // ToStringUtils.bytesRefToString(startTerm)); // System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton); // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum? // can we optimize knowing that...? diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java index 12a40d7a4d50..ec6f10cdcf2f 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/IntersectTermsEnum.java @@ -543,19 +543,6 @@ private BytesRef _next() throws IOException { } } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - private void copyTerm() { final int len = currentFrame.prefix + currentFrame.suffix; if (term.bytes.length < len) { diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java index 6ea2533574c4..7a0965485021 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java @@ -354,24 +354,6 @@ public int size() { return fieldMap.size(); } - // for debugging - String brToString(BytesRef b) { - if (b == null) { - return "null"; - } else { - try { - return b.utf8ToString() + " " + b; - } catch ( - @SuppressWarnings("unused") - Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - } - @Override public void checkIntegrity() throws IOException { // terms index diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnum.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnum.java index 52c0e3391286..224906fa5a01 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnum.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnum.java @@ -256,8 +256,8 @@ SegmentTermsEnumFrame pushFrame(FST.Arc arc, long fp, int length) thro final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); f.arc = arc; if (f.fpOrig == fp && f.nextEnt != -1) { - // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " - // isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + + // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // term.length + " vs prefix=" + f.prefix); // if (f.prefix > targetBeforeCurrentLength) { @@ -279,7 +279,7 @@ SegmentTermsEnumFrame pushFrame(FST.Arc arc, long fp, int length) thro // final int sav = term.length; // term.length = length; // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + - // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); + // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term)); // term.length = sav; // } } @@ -299,27 +299,6 @@ private boolean setEOF() { return true; } - /* - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRefBuilder b) { - return brToString(b.get()); - } - */ - @Override public boolean seekExact(BytesRef target) throws IOException { @@ -337,8 +316,9 @@ public boolean seekExact(BytesRef target) throws IOException { // if (DEBUG) { // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + - // fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" - // + termExists + ") validIndexPrefix=" + validIndexPrefix); + // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" + + // ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists + + // ") validIndexPrefix=" + validIndexPrefix); // printSeekState(System.out); // } @@ -496,8 +476,8 @@ public boolean seekExact(BytesRef target) throws IOException { } // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } @@ -528,7 +508,7 @@ public boolean seekExact(BytesRef target) throws IOException { term.setByteAt(targetUpto, (byte) targetLabel); term.setLength(1 + targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -544,7 +524,7 @@ public boolean seekExact(BytesRef target) throws IOException { } else { // if (DEBUG) { // System.out.println(" got " + result + "; return NOT_FOUND term=" + - // brToString(term)); + // ToStringUtils.bytesRefToString(term)); // } return false; } @@ -587,7 +567,7 @@ public boolean seekExact(BytesRef target) throws IOException { termExists = false; term.setLength(targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -623,7 +603,8 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { // if (DEBUG) { // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + - // fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term) + // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + + // " current=" + ToStringUtils.bytesRefToString(term) // + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix); // printSeekState(System.out); // } @@ -667,9 +648,9 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + - // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " - // vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + - // " output=" + output); + // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + + // " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + // + " output=" + output); // } if (cmp != 0) { break; @@ -781,8 +762,8 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { } // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } @@ -818,7 +799,8 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { if (next() != null) { // if (DEBUG) { - // System.out.println(" return NOT_FOUND term=" + brToString(term)); + // System.out.println(" return NOT_FOUND term=" + + // ToStringUtils.bytesRefToString(term)); // } return SeekStatus.NOT_FOUND; } else { @@ -829,7 +811,8 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { } } else { // if (DEBUG) { - // System.out.println(" return " + result + " term=" + brToString(term)); + // System.out.println(" return " + result + " term=" + + // ToStringUtils.bytesRefToString(term)); // } return result; } @@ -1029,9 +1012,10 @@ public BytesRef next() throws IOException { assert !eof; // if (DEBUG) { - // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + " - // termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" + - // currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); + // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + + // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" + + // fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + + // " validIndexPrefix=" + validIndexPrefix); // printSeekState(System.out); // } @@ -1095,8 +1079,8 @@ public BytesRef next() throws IOException { // try to scan to the right floor frame: currentFrame.loadBlock(); } else { - // if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord=" - // + currentFrame.ord); + // if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) + + // " currentFrame.ord=" + currentFrame.ord); return term.get(); } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnumFrame.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnumFrame.java index afe902fa3a8f..3003151bbe72 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnumFrame.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/SegmentTermsEnumFrame.java @@ -317,8 +317,8 @@ public boolean next() throws IOException { } public void nextLeaf() { - // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " - // entCount=" + entCount); + // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + + // " entCount=" + entCount); assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; @@ -410,8 +410,8 @@ public void scanToFloorFrame(BytesRef target) { newFP = fpOrig + (code >>> 1); hasTerms = (code & 1) != 0; // if (DEBUG) { - // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + " - // hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); + // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + + // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // } isLastInFloor = numFollowFloorBlocks == 1; @@ -566,28 +566,14 @@ public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOExcept private long subCode; CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION; - // for debugging - /* - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - */ - // Target's prefix matches this block's prefix; we // scan the entries check if the suffix matches. public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { - // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " - // nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + - // brToString(term)); + // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + + // " term=" + ToStringUtils.bytesRefToString(term)); assert nextEnt != -1; @@ -617,7 +603,7 @@ public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOEx // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" - // + brToString(suffixBytesRef)); + // + ToStringUtils.bytesRefToString(suffixBytesRef)); // } startBytePos = suffixesReader.getPosition(); @@ -682,8 +668,9 @@ public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOEx public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException { // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + - // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + - // brToString(target)); + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + + // " term=" + ToStringUtils.bytesRefToString(term)); assert nextEnt != -1; @@ -711,7 +698,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + - // (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); + // (nextEnt-1) + " (of " + entCount + ") suffix=" + + // ToStringUtils.bytesRefToString(suffixBytesRef)); // } final int termLen = prefix + suffix; @@ -743,8 +731,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // return NOT_FOUND: fillTerm(); - // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " - // ste.termExists=" + ste.termExists); + // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + + // " ste.termExists=" + ste.termExists); if (!exactOnly && !ste.termExists) { // System.out.println(" now pushFrame"); diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java index 2ededb9391fd..6bcc0671a5c3 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene40/blocktree/Lucene40BlockTreeTermsWriter.java @@ -46,6 +46,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.compress.LowercaseAsciiCompression; import org.apache.lucene.util.fst.ByteSequenceOutputs; @@ -349,7 +350,7 @@ public void write(Fields fields, NormsProducer norms) throws IOException { } // if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + - // brToString(term)); + // ToStringUtils.bytesRefToString(term)); termsWriter.write(term, termsEnum, norms); } @@ -388,33 +389,10 @@ public PendingTerm(BytesRef term, BlockTermState state) { @Override public String toString() { - return "TERM: " + brToString(termBytes); + return "TERM: " + ToStringUtils.bytesRefToString(termBytes); } } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - if (b == null) { - return "(null)"; - } else { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - } - - // for debugging - @SuppressWarnings("unused") - static String brToString(byte[] b) { - return brToString(new BytesRef(b)); - } - private static final class PendingBlock extends PendingEntry { public final BytesRef prefix; public final long fp; @@ -442,7 +420,7 @@ public PendingBlock( @Override public String toString() { - return "BLOCK: prefix=" + brToString(prefix); + return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix); } public void compileIndex( @@ -600,8 +578,8 @@ void writeBlocks(int prefixLength, int count) throws IOException { // if (DEBUG2) { // BytesRef br = new BytesRef(lastTerm.bytes()); // br.length = prefixLength; - // System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" - // + count); + // System.out.println("writeBlocks: seg=" + segment + " prefix=" + + // ToStringUtils.bytesRefToString(br) + " count=" + count); // } // Root block better write all remaining pending entries: @@ -754,9 +732,10 @@ private PendingBlock writeBlock( prefix.length = prefixLength; // if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" + - // brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == - // pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + - // " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks); + // ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + + // " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel + + // " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + + // hasSubBlocks); // Write block header: int numEntries = end - start; @@ -769,7 +748,9 @@ private PendingBlock writeBlock( /* if (DEBUG) { - System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : "")); + System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) + + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : "")); } */ @@ -804,7 +785,8 @@ private PendingBlock writeBlock( // BytesRef suffixBytes = new BytesRef(suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // suffixBytes.length = suffix; - // System.out.println(" write term suffix=" + brToString(suffixBytes)); + // System.out.println(" write term suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes)); // } // For leaf block we write suffix straight @@ -837,7 +819,8 @@ private PendingBlock writeBlock( // BytesRef suffixBytes = new BytesRef(suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // suffixBytes.length = suffix; - // System.out.println(" write term suffix=" + brToString(suffixBytes)); + // System.out.println(" write term suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes)); // } // For non-leaf block we borrow 1 bit to record @@ -879,8 +862,9 @@ private PendingBlock writeBlock( // BytesRef suffixBytes = new BytesRef(suffix); // System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); // suffixBytes.length = suffix; - // System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " - // subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); + // System.out.println(" write sub-block suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes) + + // " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); // } assert floorLeadLabel == -1 @@ -998,7 +982,8 @@ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throw if (DEBUG) { int[] tmp = new int[lastTerm.length]; System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length); - System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size()); + System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) + + " pending.size()=" + pending.size()); } */ @@ -1051,8 +1036,8 @@ private void pushTerm(BytesRef text) throws IOException { // we are closing: int prefixTopSize = pending.size() - prefixStarts[i]; if (prefixTopSize >= minItemsInBlock) { - // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " - // minItemsInBlock=" + minItemsInBlock); + // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + + // " minItemsInBlock=" + minItemsInBlock); writeBlocks(i + 1, prefixTopSize); prefixStarts[i] -= prefixTopSize - 1; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java index a930575756af..ef95f68a697d 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java @@ -230,24 +230,6 @@ public int size() { return fields.size(); } - // for debugging - String brToString(BytesRef b) { - if (b == null) { - return "null"; - } else { - try { - return b.utf8ToString() + " " + b; - } catch ( - @SuppressWarnings("unused") - Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - } - @Override public void checkIntegrity() throws IOException { // term dictionary diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java index a7ef5ef99329..fb6b0de56b7d 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java @@ -43,6 +43,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FSTCompiler; @@ -288,29 +289,10 @@ public PendingTerm(BytesRef term, BlockTermState state) { @Override public String toString() { - return brToString(termBytes); + return ToStringUtils.bytesRefToString(termBytes); } } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - - // for debugging - @SuppressWarnings("unused") - static String brToString(byte[] b) { - return brToString(new BytesRef(b)); - } - private static final class SubIndex { public final FST index; public final long termOrdStart; @@ -353,7 +335,7 @@ public PendingBlock( @Override public String toString() { - return "BLOCK: " + brToString(prefix); + return "BLOCK: " + ToStringUtils.bytesRefToString(prefix); } public void compileIndex( @@ -457,9 +439,9 @@ private void append( Output newOutput = FST_OUTPUTS.newOutput( output.bytes, termOrdOffset + output.startOrd, output.endOrd - termOrdOffset); - // System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + " - // termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" + - // newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd)); + // System.out.println(" append sub=" + indexEnt.input + " output=" + indexEnt.output + + // " termOrdOffset=" + termOrdOffset + " blockTermCount=" + blockTermCount + " newOutput=" + // + newOutput + " endOrd=" + (termOrdOffset+Long.MAX_VALUE-output.endOrd)); fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), newOutput); } } @@ -642,8 +624,8 @@ private PendingBlock writeBlock( long startFP = out.getFilePointer(); - // if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + " - // floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + + // if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + + // " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + // hasTerms + " hasSubBlocks=" + hasSubBlocks); boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1; @@ -662,11 +644,11 @@ private PendingBlock writeBlock( out.writeVInt(code); // if (DEBUG) { - // System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " - // pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + - // brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? (" - // floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + - // isLastInFloor); + // System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + + // " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + + // ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP + + // (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + + // " isLastInFloor=" + isLastInFloor); // } final List subIndices; @@ -784,7 +766,8 @@ private PendingBlock writeBlock( BytesRef suffixBytes = new BytesRef(suffix); System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); suffixBytes.length = suffix; - System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); + System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) + + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); } */ @@ -842,7 +825,8 @@ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throw if (DEBUG) { int[] tmp = new int[lastTerm.length]; System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length); - System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size()); + System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + + Arrays.toString(tmp) + " pending.size()=" + pending.size()); } */ @@ -885,8 +869,8 @@ private void pushTerm(BytesRef text) throws IOException { // we are closing: int prefixTopSize = pending.size() - prefixStarts[i]; if (prefixTopSize >= minItemsInBlock) { - // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " - // minItemsInBlock=" + minItemsInBlock); + // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + + // " minItemsInBlock=" + minItemsInBlock); writeBlocks(i + 1, prefixTopSize); prefixStarts[i] -= prefixTopSize - 1; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java index 2a95da52a1be..3f36bfeefb19 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnum.java @@ -59,7 +59,7 @@ public OrdsIntersectTermsEnum(OrdsFieldReader fr, CompiledAutomaton compiled, By throws IOException { // if (DEBUG) { // System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + - // brToString(compiled.commonSuffixRef)); + // ToStringUtils.bytesRefToString(compiled.commonSuffixRef)); // } this.fr = fr; runAutomaton = compiled.runAutomaton; @@ -280,13 +280,15 @@ private void seekToStartTerm(BytesRef target) throws IOException { currentFrame.loadNextFloorBlock(); continue; } else { - // if (DEBUG) System.out.println(" return term=" + brToString(term)); + // if (DEBUG) System.out.println(" return term=" + + // ToStringUtils.bytesRefToString(term)); return; } } continue; } else if (cmp == 0) { - // if (DEBUG) System.out.println(" return term=" + brToString(term)); + // if (DEBUG) System.out.println(" return term=" + + // ToStringUtils.bytesRefToString(term)); return; } else { // Fallback to prior entry: the semantics of @@ -324,10 +326,10 @@ public BytesRef next() throws IOException { // if (DEBUG) { // System.out.println("\nintEnum.next seg=" + segment); - // System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new - // BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " - // lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + - // (currentFrame.transitions.length == 0 ? "n/a" : + // System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + + // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + + // " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + + // " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.outputPrefix); // } @@ -340,9 +342,10 @@ public BytesRef next() throws IOException { // if (DEBUG) System.out.println(" next-floor-block"); currentFrame.loadNextFloorBlock(); // if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + - // brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + - // currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + - // currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : + // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, + // currentFrame.prefix)) + + // " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + + // " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.outputPrefix); } else { @@ -354,9 +357,10 @@ public BytesRef next() throws IOException { currentFrame = stack[currentFrame.ord - 1]; assert currentFrame.lastSubFP == lastFP; // if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + - // brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + - // currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + - // currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : + // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, + // currentFrame.prefix)) + + // " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + + // " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.outputPrefix); } @@ -370,7 +374,7 @@ public BytesRef next() throws IOException { // suffixRef.length = currentFrame.suffix; // System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " + // currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + - // brToString(suffixRef)); + // ToStringUtils.bytesRefToString(suffixRef)); // } if (currentFrame.suffix != 0) { @@ -480,15 +484,16 @@ public BytesRef next() throws IOException { copyTerm(); currentFrame = pushFrame(state); // if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + - // brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + - // currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + + // ToStringUtils.bytesRefToString(new BytesRef(term.bytes, term.offset, + // currentFrame.prefix)) + + // " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + // currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : // currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + // currentFrame.outputPrefix); } else if (runAutomaton.isAccept(state)) { copyTerm(); // if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" + - // brToString(term)); + // ToStringUtils.bytesRefToString(term)); assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0 : "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString(); return term; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java index d3107849ea16..5b6771e7856c 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnum.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.Util; @@ -174,11 +175,11 @@ OrdsSegmentTermsEnumFrame pushFrame(FST.Arc arc, long fp, int length, lo throws IOException { final OrdsSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); f.arc = arc; - // System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + " - // nextEnt=" + f.nextEnt); + // System.out.println("pushFrame termOrd= " + termOrd + " fpOrig=" + f.fpOrig + " fp=" + fp + + // " nextEnt=" + f.nextEnt); if (f.fpOrig == fp && f.nextEnt != -1) { - // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " - // isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + + // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // term.length + " vs prefix=" + f.prefix); if (f.prefix > targetBeforeCurrentLength) { @@ -204,7 +205,7 @@ OrdsSegmentTermsEnumFrame pushFrame(FST.Arc arc, long fp, int length, lo // final int sav = term.length; // term.length = length; // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + - // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); + // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term)); // term.length = sav; // } } @@ -224,19 +225,6 @@ private boolean setEOF() { return true; } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - @Override public boolean seekExact(final BytesRef target) throws IOException { @@ -250,7 +238,9 @@ public boolean seekExact(final BytesRef target) throws IOException { /* if (DEBUG) { - System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix); + System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + + ToStringUtils.bytesRefToString(target) + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists + + ") validIndexPrefix=" + validIndexPrefix); printSeekState(System.out); } */ @@ -411,8 +401,8 @@ public boolean seekExact(final BytesRef target) throws IOException { positioned = true; // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } @@ -443,7 +433,7 @@ public boolean seekExact(final BytesRef target) throws IOException { term.setByteAt(targetUpto, (byte) targetLabel); term.setLength(1 + targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -459,7 +449,7 @@ public boolean seekExact(final BytesRef target) throws IOException { } else { // if (DEBUG) { // System.out.println(" got " + result + "; return NOT_FOUND term=" + - // brToString(term)); + // ToStringUtils.bytesRefToString(term)); // } return false; } @@ -502,7 +492,7 @@ public boolean seekExact(final BytesRef target) throws IOException { termExists = false; term.setLength(targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -537,8 +527,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { // if (DEBUG) { // System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + - // target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + - // termExists + ") validIndexPrefix= " + validIndexPrefix); + // target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) + + // " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix); // printSeekState(); // } @@ -581,9 +571,9 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + - // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " - // vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + - // " output=" + output); + // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + + // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + // + " output=" + output); // } if (cmp != 0) { break; @@ -697,8 +687,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { positioned = true; // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } @@ -733,7 +723,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { if (next() != null) { // if (DEBUG) { - // System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term); + // System.out.println(" return NOT_FOUND term=" + + // ToStringUtils.bytesRefToString(term)); // } return SeekStatus.NOT_FOUND; } else { @@ -744,7 +735,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { } } else { // if (DEBUG) { - // System.out.println(" return " + result + " term=" + brToString(term) + " " + term); + // System.out.println(" return " + result + " term=" + + // ToStringUtils.bytesRefToString(term)); // } return result; } @@ -829,7 +821,7 @@ private void printSeekState(PrintStream out) throws IOException { + " prefixLen=" + f.prefix + " prefix=" - + brToString(prefix) + + ToStringUtils.bytesRefToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms @@ -859,7 +851,7 @@ private void printSeekState(PrintStream out) throws IOException { + " prefixLen=" + f.prefix + " prefix=" - + brToString(prefix) + + ToStringUtils.bytesRefToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) @@ -951,8 +943,9 @@ public BytesRef next() throws IOException { assert !eof; // if (DEBUG) { - // System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " - // termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + + // System.out.println("\nBTTR.next seg=" + segment + " term=" + + // ToStringUtils.bytesRefToString(term) + + // " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + // currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); // printSeekState(); // } @@ -1019,8 +1012,8 @@ public BytesRef next() throws IOException { // currentFrame.hasTerms = true; currentFrame.loadBlock(); } else { - // if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + " - // currentFrame.ord=" + currentFrame.ord); + // if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + + // " currentFrame.ord=" + currentFrame.ord); positioned = true; return term.get(); } @@ -1235,8 +1228,8 @@ private InputOutput getByOutput(long targetOrd) throws IOException { int low = 0; int high = arc.numArcs() - 1; int mid = 0; - // System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " - // output=" + output); + // System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + + // " output=" + output); boolean found = false; while (low <= high) { mid = (low + high) >>> 1; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java index 9eca4d14b6b6..cd98a3e0f8cf 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java @@ -119,8 +119,8 @@ public void setFloorData(ByteArrayDataInput in, BytesRef source) { numFollowFloorBlocks = floorDataReader.readVInt(); nextFloorLabel = floorDataReader.readByte() & 0xff; nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong(); - // System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + " - // shift=" + (nextFloorTermOrd-termOrdOrig)); + // System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + + // " shift=" + (nextFloorTermOrd-termOrdOrig)); // if (DEBUG) { // System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new @@ -289,8 +289,8 @@ public boolean next() { // Decodes next entry; returns true if it's a sub-block public boolean nextLeaf() { - // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " - // entCount=" + entCount); + // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + + // " entCount=" + entCount); assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp + " termOrd=" + termOrd; nextEnt++; @@ -306,8 +306,8 @@ public boolean nextLeaf() { } public boolean nextNonLeaf() { - // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " - // entCount=" + entCount); + // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + + // " entCount=" + entCount); assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; @@ -374,8 +374,8 @@ public void scanToFloorFrame(BytesRef target) { newFP = fpOrig + (code >>> 1); hasTerms = (code & 1) != 0; // if (DEBUG) { - // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " - // hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); + // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + + // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // } isLastInFloor = numFollowFloorBlocks == 1; @@ -440,8 +440,8 @@ public void scanToFloorFrame(long targetOrd) { newFP = fpOrig + (code >>> 1); hasTerms = (code & 1) != 0; // if (DEBUG) { - // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " - // hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); + // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + + // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // } isLastInFloor = numFollowFloorBlocks == 1; @@ -495,8 +495,8 @@ public void decodeMetaData() throws IOException { boolean absolute = metaDataUpto == 0; assert limit > 0 : "limit=" + limit + " isLeafBlock=" + isLeafBlock + " nextEnt=" + nextEnt; - // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " - // mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit); + // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + + // " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd + " limit=" + limit); // TODO: better API would be "jump straight to term=N"??? while (metaDataUpto < limit) { @@ -593,10 +593,10 @@ public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOExcept // scan the entries check if the suffix matches. public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { - // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " - // nextEnt=" + nextEnt + " (of " + entCount + ") target=" + - // OrdsSegmentTermsEnum.brToString(target) + " term=" + - // OrdsSegmentTermsEnum.brToString(ste.term)); + // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + " term=" + + // ToStringUtils.bytesRefToString(ste.term)); assert nextEnt != -1; @@ -627,7 +627,7 @@ public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOEx // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" - // + OrdsSegmentTermsEnum.brToString(suffixBytesRef)); + // + ToStringUtils.bytesRefToString(suffixBytesRef)); // } final int termLen = prefix + suffix; @@ -714,8 +714,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + - // OrdsSegmentTermsEnum.brToString(target) + " term=" + - // OrdsSegmentTermsEnum.brToString(ste.term)); + // ToStringUtils.bytesRefToString(target) + " term=" + + // ToStringUtils.bytesRefToString(ste.term)); assert nextEnt != -1; @@ -743,7 +743,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + - // (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); + // (nextEnt-1) + " (of " + entCount + ") suffix=" + + // ToStringUtils.bytesRefToString(suffixBytesRef)); // } ste.termExists = (code & 1) == 0; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java index 658f486933ea..17987e4e878f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java @@ -210,7 +210,7 @@ public int getDocCount() { @Override public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { // if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + - // BlockTreeTermsWriter.brToString(startTerm)); + // ToStringUtils.bytesRefToString(startTerm)); // System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton); // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum? // can we optimize knowing that...? diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java index 2346ae892a35..7677aba175d0 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java @@ -549,19 +549,6 @@ private BytesRef _next() throws IOException { } } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - private void copyTerm() { final int len = currentFrame.prefix + currentFrame.suffix; if (term.bytes.length < len) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java index 234ac53f792a..f8e99cfb7947 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java @@ -308,24 +308,6 @@ public int size() { return fieldMap.size(); } - // for debugging - String brToString(BytesRef b) { - if (b == null) { - return "null"; - } else { - try { - return b.utf8ToString() + " " + b; - } catch ( - @SuppressWarnings("unused") - Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - } - @Override public void checkIntegrity() throws IOException { // terms index diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java index 1cf045f9c145..90b34750463d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java @@ -47,6 +47,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.compress.LZ4; import org.apache.lucene.util.compress.LowercaseAsciiCompression; import org.apache.lucene.util.fst.ByteSequenceOutputs; @@ -394,7 +395,7 @@ public void write(Fields fields, NormsProducer norms) throws IOException { } // if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + - // brToString(term)); + // ToStringUtils.bytesRefToString(term)); termsWriter.write(term, termsEnum, norms); } @@ -433,33 +434,10 @@ public PendingTerm(BytesRef term, BlockTermState state) { @Override public String toString() { - return "TERM: " + brToString(termBytes); + return "TERM: " + ToStringUtils.bytesRefToString(termBytes); } } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - if (b == null) { - return "(null)"; - } else { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - } - - // for debugging - @SuppressWarnings("unused") - static String brToString(byte[] b) { - return brToString(new BytesRef(b)); - } - /** * Encodes long value to variable length byte[], in MSB order. Use {@link * FieldReader#readMSBVLong} to decode. @@ -506,7 +484,7 @@ public PendingBlock( @Override public String toString() { - return "BLOCK: prefix=" + brToString(prefix); + return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix); } public void compileIndex( @@ -689,8 +667,8 @@ void writeBlocks(int prefixLength, int count) throws IOException { // if (DEBUG2) { // BytesRef br = new BytesRef(lastTerm.bytes()); // br.length = prefixLength; - // System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" - // + count); + // System.out.println("writeBlocks: seg=" + segment + " prefix=" + + // ToStringUtils.bytesRefToString(br) + " count=" + count); // } // Root block better write all remaining pending entries: @@ -843,9 +821,10 @@ private PendingBlock writeBlock( prefix.length = prefixLength; // if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" + - // brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == - // pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + - // " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks); + // ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + + // " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel + + // " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + + // hasSubBlocks); // Write block header: int numEntries = end - start; @@ -858,7 +837,9 @@ private PendingBlock writeBlock( /* if (DEBUG) { - System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : "")); + System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) + + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : "")); } */ @@ -893,7 +874,8 @@ private PendingBlock writeBlock( // BytesRef suffixBytes = new BytesRef(suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // suffixBytes.length = suffix; - // System.out.println(" write term suffix=" + brToString(suffixBytes)); + // System.out.println(" write term suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes)); // } // For leaf block we write suffix straight @@ -926,7 +908,8 @@ private PendingBlock writeBlock( // BytesRef suffixBytes = new BytesRef(suffix); // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); // suffixBytes.length = suffix; - // System.out.println(" write term suffix=" + brToString(suffixBytes)); + // System.out.println(" write term suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes)); // } // For non-leaf block we borrow 1 bit to record @@ -968,8 +951,9 @@ private PendingBlock writeBlock( // BytesRef suffixBytes = new BytesRef(suffix); // System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); // suffixBytes.length = suffix; - // System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " - // subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); + // System.out.println(" write sub-block suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + + // (startFP-block.fp) + " floor=" + block.isFloor); // } assert floorLeadLabel == -1 @@ -1090,7 +1074,8 @@ public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throw if (DEBUG) { int[] tmp = new int[lastTerm.length]; System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length); - System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size()); + System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) + + " pending.size()=" + pending.size()); } */ @@ -1143,8 +1128,8 @@ private void pushTerm(BytesRef text) throws IOException { // we are closing: int prefixTopSize = pending.size() - prefixStarts[i]; if (prefixTopSize >= minItemsInBlock) { - // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " - // minItemsInBlock=" + minItemsInBlock); + // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + + // " minItemsInBlock=" + minItemsInBlock); writeBlocks(i + 1, prefixTopSize); prefixStarts[i] -= prefixTopSize - 1; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java index f8e3c50bcb2a..479736099ef2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java @@ -263,8 +263,8 @@ SegmentTermsEnumFrame pushFrame(FST.Arc arc, long fp, int length) thro final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); f.arc = arc; if (f.fpOrig == fp && f.nextEnt != -1) { - // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " - // isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + + // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // term.length + " vs prefix=" + f.prefix); // if (f.prefix > targetBeforeCurrentLength) { @@ -286,7 +286,7 @@ SegmentTermsEnumFrame pushFrame(FST.Arc arc, long fp, int length) thro // final int sav = term.length; // term.length = length; // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + - // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); + // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term)); // term.length = sav; // } } @@ -306,27 +306,6 @@ private boolean setEOF() { return true; } - /* - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRefBuilder b) { - return brToString(b.get()); - } - */ - @Override public boolean seekExact(BytesRef target) throws IOException { @@ -344,8 +323,9 @@ public boolean seekExact(BytesRef target) throws IOException { // if (DEBUG) { // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + - // fr.fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" - // + termExists + ") validIndexPrefix=" + validIndexPrefix); + // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" + + // ToStringUtils.bytesRefToString(term) + + // " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix); // printSeekState(System.out); // } @@ -499,8 +479,8 @@ public boolean seekExact(BytesRef target) throws IOException { } // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } @@ -531,7 +511,7 @@ public boolean seekExact(BytesRef target) throws IOException { term.setByteAt(targetUpto, (byte) targetLabel); term.setLength(1 + targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -547,7 +527,7 @@ public boolean seekExact(BytesRef target) throws IOException { } else { // if (DEBUG) { // System.out.println(" got " + result + "; return NOT_FOUND term=" + - // brToString(term)); + // ToStringUtils.bytesRefToString(term)); // } return false; } @@ -586,7 +566,7 @@ public boolean seekExact(BytesRef target) throws IOException { termExists = false; term.setLength(targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -622,8 +602,9 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { // if (DEBUG) { // System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + - // fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term) - // + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix); + // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " current=" + + // ToStringUtils.bytesRefToString(term) + " (exists?=" + termExists + + // ") validIndexPrefix= " + validIndexPrefix); // printSeekState(System.out); // } @@ -663,9 +644,9 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + - // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " - // vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + - // " output=" + output); + // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + + // " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + // + " output=" + output); // } if (cmp != 0) { break; @@ -771,8 +752,8 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { } // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } @@ -808,7 +789,8 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { if (next() != null) { // if (DEBUG) { - // System.out.println(" return NOT_FOUND term=" + brToString(term)); + // System.out.println(" return NOT_FOUND term=" + + // ToStringUtils.bytesRefToString(term)); // } return SeekStatus.NOT_FOUND; } else { @@ -819,7 +801,8 @@ public SeekStatus seekCeil(BytesRef target) throws IOException { } } else { // if (DEBUG) { - // System.out.println(" return " + result + " term=" + brToString(term)); + // System.out.println(" return " + result + " term=" + + // ToStringUtils.bytesRefToString(term)); // } return result; } @@ -1015,9 +998,10 @@ public BytesRef next() throws IOException { assert !eof; // if (DEBUG) { - // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + brToString(term) + " - // termExists?=" + termExists + " field=" + fr.fieldInfo.name + " termBlockOrd=" + - // currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); + // System.out.println("\nBTTR.next seg=" + fr.parent.segment + " term=" + + // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + " field=" + + // fr.fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + + // " validIndexPrefix=" + validIndexPrefix); // printSeekState(System.out); // } @@ -1081,8 +1065,8 @@ public BytesRef next() throws IOException { // try to scan to the right floor frame: currentFrame.loadBlock(); } else { - // if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord=" - // + currentFrame.ord); + // if (DEBUG) System.out.println(" return term=" + ToStringUtils.bytesRefToString(term) + + // " currentFrame.ord=" + currentFrame.ord); return term.get(); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java index 4016b5c784d1..66231313e520 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java @@ -295,8 +295,8 @@ public boolean next() throws IOException { } public void nextLeaf() { - // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " - // entCount=" + entCount); + // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + + // " entCount=" + entCount); assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; @@ -388,8 +388,8 @@ public void scanToFloorFrame(BytesRef target) { newFP = fpOrig + (code >>> 1); hasTerms = (code & 1) != 0; // if (DEBUG) { - // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + " - // hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); + // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + + // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // } isLastInFloor = numFollowFloorBlocks == 1; @@ -531,28 +531,14 @@ public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOExcept private long subCode; CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION; - // for debugging - /* - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - */ - // Target's prefix matches this block's prefix; we // scan the entries check if the suffix matches. public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { - // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " - // nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + - // brToString(term)); + // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + + // " term=" + ToStringUtils.bytesRefToString(term)); assert nextEnt != -1; @@ -582,7 +568,7 @@ public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOEx // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" - // + brToString(suffixBytesRef)); + // + ToStringUtils.bytesRefToString(suffixBytesRef)); // } startBytePos = suffixesReader.getPosition(); @@ -647,8 +633,9 @@ public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOEx public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException { // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + - // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + - // brToString(target)); + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + + // " term=" + ToStringUtils.bytesRefToString(term)); assert nextEnt != -1; @@ -676,7 +663,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + - // (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); + // (nextEnt-1) + " (of " + entCount + ") suffix=" + + // ToStringUtils.bytesRefToString(suffixBytesRef)); // } final int termLen = prefix + suffix; @@ -708,8 +696,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // return NOT_FOUND: fillTerm(); - // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " - // ste.termExists=" + ste.termExists); + // if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + + // " ste.termExists=" + ste.termExists); if (!exactOnly && !ste.termExists) { // System.out.println(" now pushFrame"); diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java index 1fbdaf95412d..351259e87b51 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java @@ -130,17 +130,20 @@ public boolean equals(Object other) { return false; } - /** Interprets stored bytes as UTF8 bytes, returning the resulting string */ + /** + * Interprets stored bytes as UTF-8 bytes, returning the resulting string. May throw an {@link + * AssertionError} or a {@link RuntimeException} if the data is not well-formed UTF-8. + */ public String utf8ToString() { final char[] ref = new char[length]; final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); return new String(ref, 0, len); } - /** Returns hex encoded bytes, eg [0x6c 0x75 0x63 0x65 0x6e 0x65] */ + /** Returns hex encoded bytes, e.g. "[6c 75 63 65 6e 65]" */ @Override public String toString() { - StringBuilder sb = new StringBuilder(); + StringBuilder sb = new StringBuilder(2 + 3 * length); sb.append('['); final int end = offset + length; for (int i = offset; i < end; i++) { diff --git a/lucene/core/src/java/org/apache/lucene/util/ToStringUtils.java b/lucene/core/src/java/org/apache/lucene/util/ToStringUtils.java index 41647507794f..0f079e9a8250 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ToStringUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/ToStringUtils.java @@ -32,6 +32,10 @@ public static void byteArray(StringBuilder buffer, byte[] bytes) { private static final char[] HEX = "0123456789abcdef".toCharArray(); + /** + * Unlike {@link Long#toHexString(long)} returns a String with a "0x" prefix and all the leading + * zeros. + */ public static String longHex(long x) { char[] asHex = new char[16]; for (int i = 16; --i >= 0; x >>>= 4) { @@ -39,4 +43,31 @@ public static String longHex(long x) { } return "0x" + new String(asHex); } + + /** + * Builds a String with both textual representation of the {@link BytesRef} data and the bytes hex + * values. For example: {@code "hello [68 65 6c 6c 6f]"}. If the content is not a valid UTF-8 + * sequence, only the bytes hex values are returned, as per {@link BytesRef#toString()}. + */ + @SuppressWarnings("unused") + public static String bytesRefToString(BytesRef b) { + if (b == null) { + return "null"; + } + try { + return b.utf8ToString() + " " + b; + } catch (AssertionError | RuntimeException t) { + // If BytesRef isn't actually UTF-8, or it's e.g. a prefix of UTF-8 + // that ends mid-unicode-char, we fall back to hex: + return b.toString(); + } + } + + public static String bytesRefToString(BytesRefBuilder b) { + return bytesRefToString(b.get()); + } + + public static String bytesRefToString(byte[] b) { + return bytesRefToString(new BytesRef(b)); + } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java index 163060b39430..ea2cc686b6b0 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnum.java @@ -30,6 +30,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.PairOutputs.Pair; import org.apache.lucene.util.fst.Util; @@ -175,8 +176,8 @@ IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc> arc, long final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord); f.arc = arc; if (f.fpOrig == fp && f.nextEnt != -1) { - // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " - // isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + + // if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + + // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + // term.length + " vs prefix=" + f.prefix); if (f.prefix > targetBeforeCurrentLength) { @@ -197,7 +198,7 @@ IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc> arc, long // final int sav = term.length; // term.length = length; // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + - // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term)); + // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term)); // term.length = sav; // } } @@ -222,19 +223,6 @@ public boolean seekExact(final BytesRef target) throws IOException { return seekExact(target, 0); } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - /** Get the version of the currently seek'd term; only valid if we are positioned. */ public long getVersion() { return ((IDVersionTermState) currentFrame.state).idVersion; @@ -258,8 +246,9 @@ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOExce // if (DEBUG) { // System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" + - // fr.fieldInfo.name + ":" + brToString(target) + " minIDVersion=" + minIDVersion + " current=" - // + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix); + // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" + + // minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" + + // termExists + ") validIndexPrefix=" + validIndexPrefix); // printSeekState(System.out); // } @@ -460,8 +449,8 @@ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOExce } // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength + " termExists=" + termExists); // } @@ -492,7 +481,7 @@ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOExce term.setByteAt(targetUpto, (byte) targetLabel); term.setLength(1 + targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -520,10 +509,11 @@ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOExce // termExists = false; // } // if (DEBUG) { - // System.out.println(" FAST version NOT_FOUND term=" + brToString(term) + " - // targetUpto=" + targetUpto + " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + - // " validIndexPrefix=" + validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + - // currentFrame.fp + " termExists=" + termExists); + // System.out.println(" FAST version NOT_FOUND term=" + + // ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto + + // " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" + + // validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp + + // " termExists=" + termExists); // } return false; } @@ -553,7 +543,7 @@ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOExce } else { // if (DEBUG) { // System.out.println(" got " + result + "; return NOT_FOUND term=" + - // brToString(term)); + // ToStringUtils.bytesRefToString(term)); // } return false; } @@ -604,7 +594,7 @@ public boolean seekExact(final BytesRef target, long minIDVersion) throws IOExce termExists = false; term.setLength(targetUpto); // if (DEBUG) { - // System.out.println(" FAST NOT_FOUND term=" + brToString(term)); + // System.out.println(" FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term)); // } return false; } @@ -656,8 +646,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { // if (DEBUG) { // System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + - // target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + - // termExists + ") validIndexPrefix= " + validIndexPrefix); + // target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) + + // " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix); // printSeekState(); // } @@ -700,9 +690,9 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF); // if (DEBUG) { // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + - // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " - // vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + - // " output=" + output); + // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + + // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + // + " output=" + output); // } if (cmp != 0) { break; @@ -814,8 +804,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { } // if (DEBUG) { - // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " - // currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + + // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + + // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + // targetBeforeCurrentLength); // } @@ -850,7 +840,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { if (next() != null) { // if (DEBUG) { - // System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term); + // System.out.println(" return NOT_FOUND term=" + + // ToStringUtils.bytesRefToString(term)); // } return SeekStatus.NOT_FOUND; } else { @@ -861,7 +852,8 @@ public SeekStatus seekCeil(final BytesRef target) throws IOException { } } else { // if (DEBUG) { - // System.out.println(" return " + result + " term=" + brToString(term) + " " + term); + // System.out.println(" return " + result + " term=" + + // ToStringUtils.bytesRefToString(term)); // } return result; } @@ -946,7 +938,7 @@ private void printSeekState(PrintStream out) throws IOException { + " prefixLen=" + f.prefix + " prefix=" - + brToString(prefix) + + ToStringUtils.bytesRefToString(prefix) + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms @@ -974,7 +966,7 @@ private void printSeekState(PrintStream out) throws IOException { + " prefixLen=" + f.prefix + " prefix=" - + brToString(prefix) + + ToStringUtils.bytesRefToString(prefix) + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) @@ -1063,9 +1055,10 @@ public BytesRef next() throws IOException { assert !eof; // if (DEBUG) { - // System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " - // termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + - // currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix); + // System.out.println("\nBTTR.next seg=" + segment + " term=" + + // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists + + // " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + + // " validIndexPrefix=" + validIndexPrefix); // printSeekState(); // } @@ -1129,8 +1122,8 @@ public BytesRef next() throws IOException { // currentFrame.hasTerms = true; currentFrame.loadBlock(); } else { - // if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + " - // currentFrame.ord=" + currentFrame.ord); + // if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + + // " currentFrame.ord=" + currentFrame.ord); return term.get(); } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java index 9f75b23519b1..4ecac0a93adc 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/IDVersionSegmentTermsEnumFrame.java @@ -257,8 +257,8 @@ public boolean next() { // Decodes next entry; returns true if it's a sub-block public boolean nextLeaf() { - // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " - // entCount=" + entCount); + // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + + // " entCount=" + entCount); assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; @@ -273,8 +273,8 @@ public boolean nextLeaf() { } public boolean nextNonLeaf() { - // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " - // entCount=" + entCount); + // if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + + // " entCount=" + entCount); assert nextEnt != -1 && nextEnt < entCount : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp; nextEnt++; @@ -338,8 +338,8 @@ public void scanToFloorFrame(BytesRef target) { newFP = fpOrig + (code >>> 1); hasTerms = (code & 1) != 0; // if (DEBUG) { - // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + " - // hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); + // System.out.println(" label=" + ((char) nextFloorLabel) + " fp=" + newFP + + // " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks); // } isLastInFloor = numFollowFloorBlocks == 1; @@ -379,8 +379,8 @@ public void scanToFloorFrame(BytesRef target) { public void decodeMetaData() throws IOException { - // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + " - // mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd); + // if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + ste.fr.parent.segment + + // " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd); assert nextEnt >= 0; @@ -473,10 +473,10 @@ public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOExcept // scan the entries check if the suffix matches. public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException { - // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " - // nextEnt=" + nextEnt + " (of " + entCount + ") target=" + - // IDVersionSegmentTermsEnum.brToString(target) + " term=" + - // IDVersionSegmentTermsEnum.brToString(ste.term)); + // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + + // ToStringUtils.bytesRefToString(target) + + // " term=" + ToStringUtils.bytesRefToString(ste.term)); assert nextEnt != -1; @@ -506,7 +506,7 @@ public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOEx // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" - // + IDVersionSegmentTermsEnum.brToString(suffixBytesRef)); + // + ToStringUtils.bytesRefToString(suffixBytesRef)); // } final int termLen = prefix + suffix; @@ -606,8 +606,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + // " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + - // IDVersionSegmentTermsEnum.brToString(target) + " term=" + - // IDVersionSegmentTermsEnum.brToString(ste.term)); + // ToStringUtils.bytesRefToString(target) + " term=" + + // ToStringUtils.bytesRefToString(ste.term)); assert nextEnt != -1; @@ -635,7 +635,8 @@ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws I // suffixBytesRef.offset = suffixesReader.getPosition(); // suffixBytesRef.length = suffix; // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + - // (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef)); + // (nextEnt-1) + " (of " + entCount + ") suffix=" + + // ToStringUtils.bytesRefToString(suffixBytesRef)); // } ste.termExists = (code & 1) == 0; diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsReader.java index 7fabfe3eba5b..12157ca652fe 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsReader.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsReader.java @@ -237,24 +237,6 @@ public int size() { return fields.size(); } - // for debugging - String brToString(BytesRef b) { - if (b == null) { - return "null"; - } else { - try { - return b.utf8ToString() + " " + b; - } catch ( - @SuppressWarnings("unused") - Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - } - @Override public void checkIntegrity() throws IOException { // term dictionary diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java index 0e9c2332558b..b0abc0b5059f 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/idversion/VersionBlockTreeTermsWriter.java @@ -42,6 +42,7 @@ import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRefBuilder; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.fst.ByteSequenceOutputs; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.FST; @@ -292,29 +293,10 @@ public PendingTerm(BytesRef term, BlockTermState state) { @Override public String toString() { - return brToString(termBytes); + return ToStringUtils.bytesRefToString(termBytes); } } - // for debugging - @SuppressWarnings("unused") - static String brToString(BytesRef b) { - try { - return b.utf8ToString() + " " + b; - } catch (Throwable t) { - // If BytesRef isn't actually UTF8, or it's eg a - // prefix of UTF8 that ends mid-unicode-char, we - // fallback to hex: - return b.toString(); - } - } - - // for debugging - @SuppressWarnings("unused") - static String brToString(byte[] b) { - return brToString(new BytesRef(b)); - } - private static final class PendingBlock extends PendingEntry { public final BytesRef prefix; public final long fp; @@ -347,7 +329,7 @@ public PendingBlock( @Override public String toString() { - return "BLOCK: " + brToString(prefix); + return "BLOCK: " + ToStringUtils.bytesRefToString(prefix); } public void compileIndex( @@ -610,8 +592,8 @@ private PendingBlock writeBlock( long startFP = out.getFilePointer(); - // if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + " - // floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + + // if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + + // " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + // hasTerms + " hasSubBlocks=" + hasSubBlocks); boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1; @@ -630,11 +612,11 @@ private PendingBlock writeBlock( out.writeVInt(code); // if (DEBUG) { - // System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " - // pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + - // brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? (" - // floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + - // isLastInFloor); + // System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + + // " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + + // ToStringUtils.bytesRefToString(prefix) + " entCount=" + length + " startFP=" + startFP + + // (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + + // " isLastInFloor=" + isLastInFloor); // } // 1st pass: pack term suffix bytes into byte[] blob @@ -737,7 +719,8 @@ private PendingBlock writeBlock( BytesRef suffixBytes = new BytesRef(suffix); System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); suffixBytes.length = suffix; - System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); + System.out.println(" write sub-block suffix=" + ToStringUtils.bytesRefToString(suffixBytes) + + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor); } */ @@ -824,8 +807,8 @@ private void pushTerm(BytesRef text) throws IOException { // we are closing: int prefixTopSize = pending.size() - prefixStarts[i]; if (prefixTopSize >= minItemsInBlock) { - // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " - // minItemsInBlock=" + minItemsInBlock); + // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + + // " minItemsInBlock=" + minItemsInBlock); writeBlocks(i + 1, prefixTopSize); prefixStarts[i] -= prefixTopSize - 1; }