From 7c529ce092dca15ed5ad89323852f02a493b1ab0 Mon Sep 17 00:00:00 2001 From: Patrick Zhai Date: Tue, 10 Sep 2024 17:35:37 -0700 Subject: [PATCH] Implement Accountable for NFARunAutomaton (#13741) --- lucene/CHANGES.txt | 2 ++ .../util/automaton/CompiledAutomaton.java | 2 ++ .../util/automaton/NFARunAutomaton.java | 33 +++++++++++++++++-- .../lucene/util/automaton/Transition.java | 13 +++++++- .../util/automaton/TestNFARunAutomaton.java | 11 +++++++ 5 files changed, 57 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b9173f8756f4..bec62a1474c8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -159,6 +159,8 @@ New Features searcher is created via LuceneTestCase#newSearcher. Users may override IndexSearcher#slices(List) to optionally create slices that target segment partitions. (Luca Cavanna) +* GITHUB#13741: Implement Accountable for NFARunAutomaton, fix hashCode implementation of CompiledAutomaton. (Patrick Zhai) + Improvements --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java index c10ff4f28de8..22ea583ee08c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java @@ -510,6 +510,7 @@ public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((runAutomaton == null) ? 0 : runAutomaton.hashCode()); + result = prime * result + ((nfaRunAutomaton == null) ? 0 : nfaRunAutomaton.hashCode()); result = prime * result + ((term == null) ? 0 : term.hashCode()); result = prime * result + ((type == null) ? 0 : type.hashCode()); return result; @@ -538,6 +539,7 @@ public long ramBytesUsed() { + RamUsageEstimator.sizeOfObject(automaton) + RamUsageEstimator.sizeOfObject(commonSuffixRef) + RamUsageEstimator.sizeOfObject(runAutomaton) + + RamUsageEstimator.sizeOfObject(nfaRunAutomaton) + RamUsageEstimator.sizeOfObject(term) + RamUsageEstimator.sizeOfObject(transition); } diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java index 9737098b37c2..7180e37d7051 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/NFARunAutomaton.java @@ -21,7 +21,9 @@ import java.util.HashMap; import java.util.Map; import org.apache.lucene.internal.hppc.BitMixer; +import org.apache.lucene.util.Accountable; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; /** * A RunAutomaton that does not require DFA. It will lazily determinize on-demand, memorizing the @@ -31,13 +33,16 @@ * * @lucene.internal */ -public class NFARunAutomaton implements ByteRunnable, TransitionAccessor { +public class NFARunAutomaton implements ByteRunnable, TransitionAccessor, Accountable { /** state ordinal of "no such state" */ - public static final int MISSING = -1; + private static final int MISSING = -1; private static final int NOT_COMPUTED = -2; + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(NFARunAutomaton.class); + private final Automaton automaton; private final int[] points; private final Map dStateToOrd = new HashMap<>(); // could init lazily? @@ -229,7 +234,17 @@ public void getTransition(int state, int index, Transition t) { setTransitionAccordingly(t); } - private class DState { + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + + RamUsageEstimator.sizeOfObject(automaton) + + RamUsageEstimator.sizeOfObject(points) + + RamUsageEstimator.sizeOfMap(dStateToOrd) + + RamUsageEstimator.sizeOfObject(dStates) + + RamUsageEstimator.sizeOfObject(classmap); + } + + private class DState implements Accountable { private final int[] nfaStates; // this field is lazily init'd when first time caller wants to add a new transition private int[] transitions; @@ -426,5 +441,17 @@ public boolean equals(Object o) { DState dState = (DState) o; return hashCode == dState.hashCode && Arrays.equals(nfaStates, dState.nfaStates); } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.alignObjectSize( + Integer.BYTES * 3 + + 1 + + Transition.BYTES_USED * 2 + + RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + + RamUsageEstimator.NUM_BYTES_OBJECT_REF * 4L) + + RamUsageEstimator.sizeOfObject(nfaStates) + + RamUsageEstimator.sizeOfObject(transitions); + } } } diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Transition.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Transition.java index a9e60b317607..c822a5028ead 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Transition.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Transition.java @@ -16,12 +16,18 @@ */ package org.apache.lucene.util.automaton; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; + /** * Holds one transition from an {@link Automaton}. This is typically used temporarily when iterating * through transitions by invoking {@link Automaton#initTransition} and {@link * Automaton#getNextTransition}. */ -public class Transition { +public class Transition implements Accountable { + + /** static estimation of bytes used */ + public static final long BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Transition.class); /** Sole constructor. */ public Transition() {} @@ -48,4 +54,9 @@ public Transition() {} public String toString() { return source + " --> " + dest + " " + (char) min + "-" + (char) max; } + + @Override + public long ramBytesUsed() { + return BYTES_USED; + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestNFARunAutomaton.java b/lucene/core/src/test/org/apache/lucene/util/automaton/TestNFARunAutomaton.java index 3ae55ac46d2f..c577c1de8de7 100644 --- a/lucene/core/src/test/org/apache/lucene/util/automaton/TestNFARunAutomaton.java +++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestNFARunAutomaton.java @@ -32,14 +32,25 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.RamUsageTester; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.automaton.AutomatonTestUtil; import org.apache.lucene.util.IntsRef; +import org.junit.Assert; public class TestNFARunAutomaton extends LuceneTestCase { private static final String FIELD = "field"; + public void testRamUsageEstimation() { + RegExp regExp = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE); + Automaton nfa = regExp.toAutomaton(); + NFARunAutomaton runAutomaton = new NFARunAutomaton(nfa); + long estimation = runAutomaton.ramBytesUsed(); + long actual = RamUsageTester.ramUsed(runAutomaton); + Assert.assertEquals((double) actual, (double) estimation, (double) actual * 0.3); + } + @SuppressWarnings("unused") public void testWithRandomRegex() { for (int i = 0; i < 100; i++) {