Skip to content

Commit

Permalink
Implement Accountable for NFARunAutomaton (apache#13741)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaih authored Sep 11, 2024
1 parent e4efae6 commit 7c529ce
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 4 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ New Features
searcher is created via LuceneTestCase#newSearcher. Users may override IndexSearcher#slices(List) to optionally
create slices that target segment partitions. (Luca Cavanna)

* GITHUB#13741: Implement Accountable for NFARunAutomaton, fix hashCode implementation of CompiledAutomaton. (Patrick Zhai)

Improvements
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((runAutomaton == null) ? 0 : runAutomaton.hashCode());
result = prime * result + ((nfaRunAutomaton == null) ? 0 : nfaRunAutomaton.hashCode());
result = prime * result + ((term == null) ? 0 : term.hashCode());
result = prime * result + ((type == null) ? 0 : type.hashCode());
return result;
Expand Down Expand Up @@ -538,6 +539,7 @@ public long ramBytesUsed() {
+ RamUsageEstimator.sizeOfObject(automaton)
+ RamUsageEstimator.sizeOfObject(commonSuffixRef)
+ RamUsageEstimator.sizeOfObject(runAutomaton)
+ RamUsageEstimator.sizeOfObject(nfaRunAutomaton)
+ RamUsageEstimator.sizeOfObject(term)
+ RamUsageEstimator.sizeOfObject(transition);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.internal.hppc.BitMixer;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;

/**
* A RunAutomaton that does not require DFA. It will lazily determinize on-demand, memorizing the
Expand All @@ -31,13 +33,16 @@
*
* @lucene.internal
*/
public class NFARunAutomaton implements ByteRunnable, TransitionAccessor {
public class NFARunAutomaton implements ByteRunnable, TransitionAccessor, Accountable {

/** state ordinal of "no such state" */
public static final int MISSING = -1;
private static final int MISSING = -1;

private static final int NOT_COMPUTED = -2;

private static final long BASE_RAM_BYTES =
RamUsageEstimator.shallowSizeOfInstance(NFARunAutomaton.class);

private final Automaton automaton;
private final int[] points;
private final Map<DState, Integer> dStateToOrd = new HashMap<>(); // could init lazily?
Expand Down Expand Up @@ -229,7 +234,17 @@ public void getTransition(int state, int index, Transition t) {
setTransitionAccordingly(t);
}

private class DState {
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES
+ RamUsageEstimator.sizeOfObject(automaton)
+ RamUsageEstimator.sizeOfObject(points)
+ RamUsageEstimator.sizeOfMap(dStateToOrd)
+ RamUsageEstimator.sizeOfObject(dStates)
+ RamUsageEstimator.sizeOfObject(classmap);
}

private class DState implements Accountable {
private final int[] nfaStates;
// this field is lazily init'd when first time caller wants to add a new transition
private int[] transitions;
Expand Down Expand Up @@ -426,5 +441,17 @@ public boolean equals(Object o) {
DState dState = (DState) o;
return hashCode == dState.hashCode && Arrays.equals(nfaStates, dState.nfaStates);
}

@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
Integer.BYTES * 3
+ 1
+ Transition.BYTES_USED * 2
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF * 4L)
+ RamUsageEstimator.sizeOfObject(nfaStates)
+ RamUsageEstimator.sizeOfObject(transitions);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@
*/
package org.apache.lucene.util.automaton;

import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;

/**
* Holds one transition from an {@link Automaton}. This is typically used temporarily when iterating
* through transitions by invoking {@link Automaton#initTransition} and {@link
* Automaton#getNextTransition}.
*/
public class Transition {
public class Transition implements Accountable {

/** static estimation of bytes used */
public static final long BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Transition.class);

/** Sole constructor. */
public Transition() {}
Expand All @@ -48,4 +54,9 @@ public Transition() {}
public String toString() {
return source + " --> " + dest + " " + (char) min + "-" + (char) max;
}

@Override
public long ramBytesUsed() {
return BYTES_USED;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,25 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.RamUsageTester;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.tests.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.IntsRef;
import org.junit.Assert;

public class TestNFARunAutomaton extends LuceneTestCase {

private static final String FIELD = "field";

public void testRamUsageEstimation() {
RegExp regExp = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
Automaton nfa = regExp.toAutomaton();
NFARunAutomaton runAutomaton = new NFARunAutomaton(nfa);
long estimation = runAutomaton.ramBytesUsed();
long actual = RamUsageTester.ramUsed(runAutomaton);
Assert.assertEquals((double) actual, (double) estimation, (double) actual * 0.3);
}

@SuppressWarnings("unused")
public void testWithRandomRegex() {
for (int i = 0; i < 100; i++) {
Expand Down

0 comments on commit 7c529ce

Please sign in to comment.