From db63c0ca8ae770c3f123492174bb9830c0d3ca1e Mon Sep 17 00:00:00 2001 From: dtspence <33552925+dtspence@users.noreply.github.com> Date: Fri, 4 Oct 2024 02:15:10 +0000 Subject: [PATCH 1/6] Field configuration helper cache implementation --- warehouse/ingest-core/pom.xml | 15 +++ .../data/config/CachedFieldConfigHelper.java | 75 ++++++++++++++ .../data/config/ingest/BaseIngestHelper.java | 15 ++- .../config/CachingFieldConfigHelperTest.java | 99 +++++++++++++++++++ 4 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java create mode 100644 warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java diff --git a/warehouse/ingest-core/pom.xml b/warehouse/ingest-core/pom.xml index 93c4b6b61f1..f5ca2dd1117 100644 --- a/warehouse/ingest-core/pom.xml +++ b/warehouse/ingest-core/pom.xml @@ -212,6 +212,21 @@ javassist test + + org.junit.jupiter + junit-jupiter-api + test + + + org.junit.jupiter + junit-jupiter-engine + test + + + org.junit.jupiter + junit-jupiter-params + test + diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java new file mode 100644 index 00000000000..48f3c4b7e38 --- /dev/null +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java @@ -0,0 +1,75 @@ +package datawave.ingest.data.config; + +import java.util.EnumMap; +import java.util.Map; +import java.util.function.Function; + +import org.apache.commons.collections4.map.LRUMap; + +import com.google.common.annotations.VisibleForTesting; + +public class CachedFieldConfigHelper implements FieldConfigHelper { + private final FieldConfigHelper underlyingHelper; + private final Map resultCache; + + enum AttributeType { + INDEXED_FIELD, REVERSE_INDEXED_FIELD, TOKENIZED_FIELD, REVERSE_TOKENIZED_FIELD, STORED_FIELD, INDEXED_ONLY + } + + public CachedFieldConfigHelper(FieldConfigHelper helper, int limit) { + if (limit < 1) { + throw new IllegalArgumentException("Limit must be a positive integer"); + } + this.underlyingHelper = helper; + this.resultCache = new LRUMap<>(limit); + } + + @Override + public boolean isStoredField(String fieldName) { + return getOrEvaluate(AttributeType.STORED_FIELD, fieldName, underlyingHelper::isStoredField); + } + + @Override + public boolean isIndexedField(String fieldName) { + return getOrEvaluate(AttributeType.INDEXED_FIELD, fieldName, underlyingHelper::isIndexedField); + } + + @Override + public boolean isIndexOnlyField(String fieldName) { + return getOrEvaluate(AttributeType.INDEXED_ONLY, fieldName, underlyingHelper::isIndexOnlyField); + } + + @Override + public boolean isReverseIndexedField(String fieldName) { + return getOrEvaluate(AttributeType.REVERSE_INDEXED_FIELD, fieldName, underlyingHelper::isReverseIndexedField); + } + + @Override + public boolean isTokenizedField(String fieldName) { + return getOrEvaluate(AttributeType.TOKENIZED_FIELD, fieldName, underlyingHelper::isTokenizedField); + } + + @Override + public boolean isReverseTokenizedField(String fieldName) { + return getOrEvaluate(AttributeType.REVERSE_TOKENIZED_FIELD, fieldName, underlyingHelper::isReverseTokenizedField); + } + + @VisibleForTesting + boolean getOrEvaluate(AttributeType attributeType, String fieldName, Function evaluateFn) { + return resultCache.computeIfAbsent(fieldName, ResultEntry::new).resolveResult(attributeType, evaluateFn); + } + + private static class ResultEntry { + private final String fieldName; + private final EnumMap resultMap; + + ResultEntry(String fieldName) { + this.fieldName = fieldName; + this.resultMap = new EnumMap<>(AttributeType.class); + } + + boolean resolveResult(AttributeType attributeType, Function evaluateFn) { + return resultMap.computeIfAbsent(attributeType, (t) -> evaluateFn.apply(fieldName)); + } + } +} diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java index 4be57bdf85e..d6c42eba3a3 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java @@ -33,6 +33,7 @@ import datawave.ingest.config.IngestConfigurationFactory; import datawave.ingest.data.Type; import datawave.ingest.data.TypeRegistry; +import datawave.ingest.data.config.CachedFieldConfigHelper; import datawave.ingest.data.config.DataTypeHelperImpl; import datawave.ingest.data.config.FieldConfigHelper; import datawave.ingest.data.config.MarkingsHelper; @@ -137,9 +138,14 @@ public abstract class BaseIngestHelper extends AbstractIngestHelper implements C public static final String FIELD_FAILED_NORMALIZATION_POLICY = ".data.field.normalization.failure.policy"; public static final String FIELD_CONFIG_FILE = ".data.category.field.config.file"; + public static final String FIELD_CONFIG_CACHE_ENABLED = ".data.category.field.config.cache.enabled"; + public static final String FIELD_CONFIG_CACHE_KEY_LIMIT = ".data.category.field.config.cache.limit"; private static final Logger log = ThreadConfigurableLogger.getLogger(BaseIngestHelper.class); + private static final boolean DEFAULT_FIELD_CACHE_ENABLED = false; + private static final int DEFAULT_FIELD_CACHE_LIMIT = 100; + private Multimap> typeFieldMap = null; private Multimap> typePatternMap = null; private TreeMultimap> typeCompiledPatternMap = null; @@ -254,10 +260,17 @@ public void setup(Configuration config) { // Load the field helper, which takes precedence over the individual field configurations final String fieldConfigFile = config.get(this.getType().typeName() + FIELD_CONFIG_FILE); if (fieldConfigFile != null) { + final boolean fieldConfigCacheEnabled = config.getBoolean(this.getType().typeName() + FIELD_CONFIG_CACHE_ENABLED, DEFAULT_FIELD_CACHE_ENABLED); + final int fieldConfigCacheLimit = config.getInt(this.getType().typeName() + FIELD_CONFIG_CACHE_KEY_LIMIT, DEFAULT_FIELD_CACHE_LIMIT); if (log.isDebugEnabled()) { log.debug("Field config file " + fieldConfigFile + " specified for: " + this.getType().typeName() + FIELD_CONFIG_FILE); + log.debug("Field config cache enabled: " + fieldConfigCacheEnabled); + if (fieldConfigCacheEnabled) { + log.debug("Field config cache limit: " + fieldConfigCacheLimit); + } } - this.fieldConfigHelper = XMLFieldConfigHelper.load(fieldConfigFile, this); + final FieldConfigHelper baseHelper = XMLFieldConfigHelper.load(fieldConfigFile, this); + fieldConfigHelper = fieldConfigCacheEnabled ? new CachedFieldConfigHelper(baseHelper, fieldConfigCacheLimit) : baseHelper; } // Process the indexed fields diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java new file mode 100644 index 00000000000..48392823fbc --- /dev/null +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java @@ -0,0 +1,99 @@ +package datawave.ingest.data.config; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.stream.Stream; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class CachingFieldConfigHelperTest { + @SuppressWarnings("unchecked") + @Test + public void testCachingBehaviorWillCallBaseMethods() { + // @formatter:off + Stream.of(new Object[][] { + new Object[] { + (BiConsumer) FieldConfigHelper::isIndexOnlyField, + (BiConsumer) (h, f) -> verify(h).isIndexOnlyField(eq(f)), + (BiConsumer) FieldConfigHelper::isIndexedField, + (BiConsumer) (h, f) -> verify(h).isIndexedField(eq(f)), + (BiConsumer) FieldConfigHelper::isTokenizedField, + (BiConsumer) (h, f) -> verify(h).isTokenizedField(eq(f)), + (BiConsumer) FieldConfigHelper::isStoredField, + (BiConsumer) (h, f) -> verify(h).isStoredField(eq(f)), + (BiConsumer) FieldConfigHelper::isReverseIndexedField, + (BiConsumer) (h, f) -> verify(h).isReverseIndexedField(eq(f)), + (BiConsumer) FieldConfigHelper::isReverseTokenizedField, + (BiConsumer) (h, f) -> verify(h).isReverseTokenizedField(eq(f)), + } + }).forEach(arg -> { + // param[0] = helper method + // param[1] = validation method + String fieldName = "testField"; + BiConsumer testAction = (BiConsumer) arg[0]; + BiConsumer verifyAction = (BiConsumer) arg[1]; + FieldConfigHelper mockHelper = mock(FieldConfigHelper.class); + FieldConfigHelper cachedHelper = new CachedFieldConfigHelper(mockHelper, 1); + testAction.accept(cachedHelper, fieldName); + verifyAction.accept(mockHelper, fieldName); + }); + // @formatter:on + } + + @ParameterizedTest + @ValueSource(ints = {-1, 0}) + public void testConstructorWithNonPositiveLimitWillThrow(int limit) { + assertThrows(IllegalArgumentException.class, () -> new CachedFieldConfigHelper(mock(FieldConfigHelper.class), limit)); + } + + @Test + public void testCachingLimitsBetweenFieldsAndAttributeTypes() { + AtomicLong counter = new AtomicLong(); + CachedFieldConfigHelper helper = new CachedFieldConfigHelper(mock(FieldConfigHelper.class), 2); + Function fn = (f) -> { + counter.incrementAndGet(); + return true; + }; + + // following ensures that: + // 1. fields are computed, where appropriate per attribute-type + // 2. limit allows cache results to return + // 3. limit blocks results to return if exceeded + // 4. limit functions across attribute-types + + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", fn); + Assertions.assertEquals(1, counter.get(), "field1 should compute result (new field)"); + + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", fn); + Assertions.assertEquals(1, counter.get(), "field1 repeated (existing field)"); + + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn); + Assertions.assertEquals(2, counter.get(), "field2 should compute result (new field)"); + + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn); + Assertions.assertEquals(2, counter.get(), "field2 repeated (existing)"); + + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.INDEXED_FIELD, "field1", fn); + Assertions.assertEquals(3, counter.get(), "field1 should compute result (new attribute)"); + + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", fn); + Assertions.assertEquals(4, counter.get(), "field3 exceeded limit (new field)"); + + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", fn); + Assertions.assertEquals(4, counter.get(), "field3 exceeded limit (existing field)"); + + // LRU map should evict field #2 + // we access field #1 above which has more accesses over field #2 + helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn); + Assertions.assertEquals(5, counter.get(), "field1 exceeded limit (new field/eviction)"); + } +} From 5f20b24e40f2445a6bb350de736f6ed377b22ce3 Mon Sep 17 00:00:00 2001 From: dtspence <33552925+dtspence@users.noreply.github.com> Date: Thu, 24 Oct 2024 14:47:52 +0000 Subject: [PATCH 2/6] Refactored unit test and fixed test bug --- .../config/CachingFieldConfigHelperTest.java | 52 ++++++++----------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java index 48392823fbc..f1d0deaa8fd 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java @@ -6,9 +6,7 @@ import static org.mockito.Mockito.verify; import java.util.concurrent.atomic.AtomicLong; -import java.util.function.BiConsumer; import java.util.function.Function; -import java.util.stream.Stream; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -16,37 +14,29 @@ import org.junit.jupiter.params.provider.ValueSource; public class CachingFieldConfigHelperTest { - @SuppressWarnings("unchecked") @Test public void testCachingBehaviorWillCallBaseMethods() { - // @formatter:off - Stream.of(new Object[][] { - new Object[] { - (BiConsumer) FieldConfigHelper::isIndexOnlyField, - (BiConsumer) (h, f) -> verify(h).isIndexOnlyField(eq(f)), - (BiConsumer) FieldConfigHelper::isIndexedField, - (BiConsumer) (h, f) -> verify(h).isIndexedField(eq(f)), - (BiConsumer) FieldConfigHelper::isTokenizedField, - (BiConsumer) (h, f) -> verify(h).isTokenizedField(eq(f)), - (BiConsumer) FieldConfigHelper::isStoredField, - (BiConsumer) (h, f) -> verify(h).isStoredField(eq(f)), - (BiConsumer) FieldConfigHelper::isReverseIndexedField, - (BiConsumer) (h, f) -> verify(h).isReverseIndexedField(eq(f)), - (BiConsumer) FieldConfigHelper::isReverseTokenizedField, - (BiConsumer) (h, f) -> verify(h).isReverseTokenizedField(eq(f)), - } - }).forEach(arg -> { - // param[0] = helper method - // param[1] = validation method - String fieldName = "testField"; - BiConsumer testAction = (BiConsumer) arg[0]; - BiConsumer verifyAction = (BiConsumer) arg[1]; - FieldConfigHelper mockHelper = mock(FieldConfigHelper.class); - FieldConfigHelper cachedHelper = new CachedFieldConfigHelper(mockHelper, 1); - testAction.accept(cachedHelper, fieldName); - verifyAction.accept(mockHelper, fieldName); - }); - // @formatter:on + String fieldName = "test"; + FieldConfigHelper mockHelper = mock(FieldConfigHelper.class); + FieldConfigHelper cachedHelper = new CachedFieldConfigHelper(mockHelper, 1); + + cachedHelper.isIndexOnlyField(fieldName); + verify(mockHelper).isIndexOnlyField(eq(fieldName)); + + cachedHelper.isIndexedField(fieldName); + verify(mockHelper).isIndexedField(eq(fieldName)); + + cachedHelper.isTokenizedField(fieldName); + verify(mockHelper).isTokenizedField(eq(fieldName)); + + cachedHelper.isStoredField(fieldName); + verify(mockHelper).isStoredField(eq(fieldName)); + + cachedHelper.isReverseIndexedField(fieldName); + verify(mockHelper).isReverseIndexedField(eq(fieldName)); + + cachedHelper.isReverseTokenizedField(fieldName); + verify(mockHelper).isReverseTokenizedField(eq(fieldName)); } @ParameterizedTest From 47d1cf310d8c4bfa1bf92180b2e0e333e97cd79e Mon Sep 17 00:00:00 2001 From: dtspence <33552925+dtspence@users.noreply.github.com> Date: Thu, 24 Oct 2024 18:17:30 +0000 Subject: [PATCH 3/6] Refactored implementation to use explicit switch and unit test update --- .../data/config/CachedFieldConfigHelper.java | 83 ++++++++++++++----- .../config/CachingFieldConfigHelperTest.java | 66 ++++++++++----- 2 files changed, 107 insertions(+), 42 deletions(-) diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java index 48f3c4b7e38..ec3e602e717 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java @@ -1,8 +1,7 @@ package datawave.ingest.data.config; -import java.util.EnumMap; import java.util.Map; -import java.util.function.Function; +import java.util.function.Predicate; import org.apache.commons.collections4.map.LRUMap; @@ -10,10 +9,10 @@ public class CachedFieldConfigHelper implements FieldConfigHelper { private final FieldConfigHelper underlyingHelper; - private final Map resultCache; + private final Map resultCache; enum AttributeType { - INDEXED_FIELD, REVERSE_INDEXED_FIELD, TOKENIZED_FIELD, REVERSE_TOKENIZED_FIELD, STORED_FIELD, INDEXED_ONLY + INDEXED_FIELD, REVERSE_INDEXED_FIELD, TOKENIZED_FIELD, REVERSE_TOKENIZED_FIELD, STORED_FIELD, INDEX_ONLY_FIELD } public CachedFieldConfigHelper(FieldConfigHelper helper, int limit) { @@ -26,50 +25,96 @@ public CachedFieldConfigHelper(FieldConfigHelper helper, int limit) { @Override public boolean isStoredField(String fieldName) { - return getOrEvaluate(AttributeType.STORED_FIELD, fieldName, underlyingHelper::isStoredField); + return getFieldResult(AttributeType.STORED_FIELD, fieldName, underlyingHelper::isStoredField); } @Override public boolean isIndexedField(String fieldName) { - return getOrEvaluate(AttributeType.INDEXED_FIELD, fieldName, underlyingHelper::isIndexedField); + return getFieldResult(AttributeType.INDEXED_FIELD, fieldName, underlyingHelper::isIndexedField); } @Override public boolean isIndexOnlyField(String fieldName) { - return getOrEvaluate(AttributeType.INDEXED_ONLY, fieldName, underlyingHelper::isIndexOnlyField); + return getFieldResult(AttributeType.INDEX_ONLY_FIELD, fieldName, underlyingHelper::isIndexOnlyField); } @Override public boolean isReverseIndexedField(String fieldName) { - return getOrEvaluate(AttributeType.REVERSE_INDEXED_FIELD, fieldName, underlyingHelper::isReverseIndexedField); + return getFieldResult(AttributeType.REVERSE_INDEXED_FIELD, fieldName, underlyingHelper::isReverseIndexedField); } @Override public boolean isTokenizedField(String fieldName) { - return getOrEvaluate(AttributeType.TOKENIZED_FIELD, fieldName, underlyingHelper::isTokenizedField); + return getFieldResult(AttributeType.TOKENIZED_FIELD, fieldName, underlyingHelper::isTokenizedField); } @Override public boolean isReverseTokenizedField(String fieldName) { - return getOrEvaluate(AttributeType.REVERSE_TOKENIZED_FIELD, fieldName, underlyingHelper::isReverseTokenizedField); + return getFieldResult(AttributeType.REVERSE_TOKENIZED_FIELD, fieldName, underlyingHelper::isReverseTokenizedField); } @VisibleForTesting - boolean getOrEvaluate(AttributeType attributeType, String fieldName, Function evaluateFn) { - return resultCache.computeIfAbsent(fieldName, ResultEntry::new).resolveResult(attributeType, evaluateFn); + boolean getFieldResult(AttributeType attributeType, String fieldName, Predicate fn) { + return resultCache.computeIfAbsent(fieldName, CachedEntry::new).get(attributeType).getResultOrEvaluate(fn); } - private static class ResultEntry { + private static class CachedEntry { private final String fieldName; - private final EnumMap resultMap; - - ResultEntry(String fieldName) { + private final MemoizedResult indexed; + private final MemoizedResult reverseIndexed; + private final MemoizedResult stored; + private final MemoizedResult indexedOnly; + private final MemoizedResult tokenized; + private final MemoizedResult reverseTokenized; + + private CachedEntry(String fieldName) { this.fieldName = fieldName; - this.resultMap = new EnumMap<>(AttributeType.class); + this.indexed = new MemoizedResult(); + this.reverseIndexed = new MemoizedResult(); + this.stored = new MemoizedResult(); + this.indexedOnly = new MemoizedResult(); + this.tokenized = new MemoizedResult(); + this.reverseTokenized = new MemoizedResult(); + } + + private MemoizedResult get(AttributeType attributeType) { + MemoizedResult result; + switch (attributeType) { + case INDEX_ONLY_FIELD: + result = indexedOnly; + break; + case INDEXED_FIELD: + result = indexed; + break; + case REVERSE_INDEXED_FIELD: + result = reverseIndexed; + break; + case TOKENIZED_FIELD: + result = tokenized; + break; + case REVERSE_TOKENIZED_FIELD: + result = reverseTokenized; + break; + case STORED_FIELD: + result = stored; + break; + default: + throw new IllegalArgumentException("Undefined attribute type: " + attributeType); + } + return result; } - boolean resolveResult(AttributeType attributeType, Function evaluateFn) { - return resultMap.computeIfAbsent(attributeType, (t) -> evaluateFn.apply(fieldName)); + private class MemoizedResult { + private boolean resultEvaluated; + private boolean result; + + private boolean getResultOrEvaluate(Predicate evaluateFn) { + if (!resultEvaluated) { + result = evaluateFn.test(fieldName); + resultEvaluated = true; + } + return result; + } } } } diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java index f1d0deaa8fd..b973dea7b24 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java @@ -1,16 +1,18 @@ package datawave.ingest.data.config; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.ValueSource; public class CachingFieldConfigHelperTest { @@ -45,14 +47,32 @@ public void testConstructorWithNonPositiveLimitWillThrow(int limit) { assertThrows(IllegalArgumentException.class, () -> new CachedFieldConfigHelper(mock(FieldConfigHelper.class), limit)); } + @SuppressWarnings("ClassEscapesDefinedScope") + @ParameterizedTest + @EnumSource(CachedFieldConfigHelper.AttributeType.class) + public void testAttributeTypesDoNotThrow(CachedFieldConfigHelper.AttributeType attributeType) { + String fieldName = "test"; + FieldConfigHelper mockHelper = mock(FieldConfigHelper.class); + CachedFieldConfigHelper cachedHelper = new CachedFieldConfigHelper(mockHelper, 1); + cachedHelper.getFieldResult(attributeType, fieldName, (f) -> true); + } + @Test public void testCachingLimitsBetweenFieldsAndAttributeTypes() { - AtomicLong counter = new AtomicLong(); - CachedFieldConfigHelper helper = new CachedFieldConfigHelper(mock(FieldConfigHelper.class), 2); - Function fn = (f) -> { - counter.incrementAndGet(); + AtomicLong storedCounter = new AtomicLong(); + AtomicLong indexCounter = new AtomicLong(); + FieldConfigHelper innerHelper = mock(FieldConfigHelper.class); + CachedFieldConfigHelper helper = new CachedFieldConfigHelper(innerHelper, 2); + + when(innerHelper.isStoredField(any())).then((a) -> { + storedCounter.incrementAndGet(); + return true; + }); + + when(innerHelper.isIndexedField(any())).then((a) -> { + indexCounter.incrementAndGet(); return true; - }; + }); // following ensures that: // 1. fields are computed, where appropriate per attribute-type @@ -60,30 +80,30 @@ public void testCachingLimitsBetweenFieldsAndAttributeTypes() { // 3. limit blocks results to return if exceeded // 4. limit functions across attribute-types - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", fn); - Assertions.assertEquals(1, counter.get(), "field1 should compute result (new field)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", innerHelper::isStoredField); + assertEquals(1, storedCounter.get(), "field1 should compute result (new field)"); - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", fn); - Assertions.assertEquals(1, counter.get(), "field1 repeated (existing field)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", innerHelper::isStoredField); + assertEquals(1, storedCounter.get(), "field1 repeated (existing field)"); - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn); - Assertions.assertEquals(2, counter.get(), "field2 should compute result (new field)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField); + assertEquals(2, storedCounter.get(), "field2 should compute result (new field)"); - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn); - Assertions.assertEquals(2, counter.get(), "field2 repeated (existing)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField); + assertEquals(2, storedCounter.get(), "field2 repeated (existing)"); - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.INDEXED_FIELD, "field1", fn); - Assertions.assertEquals(3, counter.get(), "field1 should compute result (new attribute)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.INDEXED_FIELD, "field1", innerHelper::isIndexedField); + assertEquals(1, indexCounter.get(), "field1 should compute result (new attribute)"); - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", fn); - Assertions.assertEquals(4, counter.get(), "field3 exceeded limit (new field)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", innerHelper::isStoredField); + assertEquals(3, storedCounter.get(), "field3 exceeded limit (new field)"); - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", fn); - Assertions.assertEquals(4, counter.get(), "field3 exceeded limit (existing field)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field3", innerHelper::isStoredField); + assertEquals(3, storedCounter.get(), "field3 exceeded limit (existing field)"); // LRU map should evict field #2 // we access field #1 above which has more accesses over field #2 - helper.getOrEvaluate(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", fn); - Assertions.assertEquals(5, counter.get(), "field1 exceeded limit (new field/eviction)"); + helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField); + assertEquals(4, storedCounter.get(), "field1 exceeded limit (new field/eviction)"); } } From 1a925e224677de2dcaabc3c5a576a85b423a35ae Mon Sep 17 00:00:00 2001 From: dtspence <33552925+dtspence@users.noreply.github.com> Date: Fri, 1 Nov 2024 21:48:58 +0000 Subject: [PATCH 4/6] Use LinkedHashMap as LRU implementation --- .../data/config/CachedFieldConfigHelper.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java index ec3e602e717..7f606cf20c5 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java @@ -1,13 +1,14 @@ package datawave.ingest.data.config; +import com.google.common.annotations.VisibleForTesting; + +import java.util.LinkedHashMap; import java.util.Map; import java.util.function.Predicate; -import org.apache.commons.collections4.map.LRUMap; - -import com.google.common.annotations.VisibleForTesting; - public class CachedFieldConfigHelper implements FieldConfigHelper { + private final static float DEFAULT_LRU_LF = 0.75f; + private final FieldConfigHelper underlyingHelper; private final Map resultCache; @@ -20,7 +21,7 @@ public CachedFieldConfigHelper(FieldConfigHelper helper, int limit) { throw new IllegalArgumentException("Limit must be a positive integer"); } this.underlyingHelper = helper; - this.resultCache = new LRUMap<>(limit); + this.resultCache = lruCache(limit); } @Override @@ -58,6 +59,16 @@ boolean getFieldResult(AttributeType attributeType, String fieldName, Predicate< return resultCache.computeIfAbsent(fieldName, CachedEntry::new).get(attributeType).getResultOrEvaluate(fn); } + static Map lruCache(final int maxSize) { + // Testing showed slightly better or same performance of LRU implementation below + // when compared to Apache Commons LRUMap + return new LinkedHashMap<>((int) (maxSize / DEFAULT_LRU_LF) + 1, DEFAULT_LRU_LF, true) { + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > maxSize; + } + }; + } + private static class CachedEntry { private final String fieldName; private final MemoizedResult indexed; From 7bdf955dcc2cdd4ecc03239e510443dd344328af Mon Sep 17 00:00:00 2001 From: dtspence <33552925+dtspence@users.noreply.github.com> Date: Mon, 4 Nov 2024 21:21:04 +0000 Subject: [PATCH 5/6] Rename unit test for main class name --- ...ldConfigHelperTest.java => CachedFieldConfigHelperTest.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename warehouse/ingest-core/src/test/java/datawave/ingest/data/config/{CachingFieldConfigHelperTest.java => CachedFieldConfigHelperTest.java} (99%) diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachedFieldConfigHelperTest.java similarity index 99% rename from warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java rename to warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachedFieldConfigHelperTest.java index b973dea7b24..18837a7c1c2 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachingFieldConfigHelperTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachedFieldConfigHelperTest.java @@ -15,7 +15,7 @@ import org.junit.jupiter.params.provider.EnumSource; import org.junit.jupiter.params.provider.ValueSource; -public class CachingFieldConfigHelperTest { +public class CachedFieldConfigHelperTest { @Test public void testCachingBehaviorWillCallBaseMethods() { String fieldName = "test"; From 4de29b003eb49df8233deddc5d81444549536498 Mon Sep 17 00:00:00 2001 From: dtspence <33552925+dtspence@users.noreply.github.com> Date: Thu, 7 Nov 2024 20:57:30 +0000 Subject: [PATCH 6/6] Logging to show if field limit has been exceeded for debugging --- .../data/config/CachedFieldConfigHelper.java | 31 +++++++++++++++++-- .../data/config/ingest/BaseIngestHelper.java | 16 +++++----- .../config/CachedFieldConfigHelperTest.java | 7 ++++- 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java index 7f606cf20c5..d72408ce789 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/CachedFieldConfigHelper.java @@ -1,27 +1,49 @@ package datawave.ingest.data.config; import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.LinkedHashMap; import java.util.Map; +import java.util.function.Function; import java.util.function.Predicate; public class CachedFieldConfigHelper implements FieldConfigHelper { + private final static Logger log = LoggerFactory.getLogger(CachedFieldConfigHelper.class); + private final static float DEFAULT_LRU_LF = 0.75f; + private final static int EMIT_OVER_LIMIT_THRESHOLD = 100; private final FieldConfigHelper underlyingHelper; private final Map resultCache; + private final Function resultEntryFn; + + private long fieldComputes; + private boolean fieldLimitExceeded; enum AttributeType { INDEXED_FIELD, REVERSE_INDEXED_FIELD, TOKENIZED_FIELD, REVERSE_TOKENIZED_FIELD, STORED_FIELD, INDEX_ONLY_FIELD } public CachedFieldConfigHelper(FieldConfigHelper helper, int limit) { + this(helper, limit, false); + } + + public CachedFieldConfigHelper(FieldConfigHelper helper, int limit, boolean debugLimitExceeded) { if (limit < 1) { throw new IllegalArgumentException("Limit must be a positive integer"); } this.underlyingHelper = helper; this.resultCache = lruCache(limit); + this.resultEntryFn = !debugLimitExceeded ? CachedEntry::new : (String f) -> { + fieldComputes++; + if (fieldComputes >= limit && ((fieldComputes == limit) || (fieldComputes % EMIT_OVER_LIMIT_THRESHOLD) == 0)) { + fieldLimitExceeded = true; + log.info("Field cache limit exceeded [val: {}, size={}, limit={}]", f, fieldComputes, limit); + } + return new CachedEntry(f); + }; } @Override @@ -56,10 +78,15 @@ public boolean isReverseTokenizedField(String fieldName) { @VisibleForTesting boolean getFieldResult(AttributeType attributeType, String fieldName, Predicate fn) { - return resultCache.computeIfAbsent(fieldName, CachedEntry::new).get(attributeType).getResultOrEvaluate(fn); + return resultCache.computeIfAbsent(fieldName, resultEntryFn).get(attributeType).getResultOrEvaluate(fn); + } + + @VisibleForTesting + boolean hasLimitExceeded() { + return fieldLimitExceeded; } - static Map lruCache(final int maxSize) { + private static Map lruCache(final int maxSize) { // Testing showed slightly better or same performance of LRU implementation below // when compared to Apache Commons LRUMap return new LinkedHashMap<>((int) (maxSize / DEFAULT_LRU_LF) + 1, DEFAULT_LRU_LF, true) { diff --git a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java index d6c42eba3a3..c90c478d0c2 100644 --- a/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java +++ b/warehouse/ingest-core/src/main/java/datawave/ingest/data/config/ingest/BaseIngestHelper.java @@ -140,12 +140,10 @@ public abstract class BaseIngestHelper extends AbstractIngestHelper implements C public static final String FIELD_CONFIG_FILE = ".data.category.field.config.file"; public static final String FIELD_CONFIG_CACHE_ENABLED = ".data.category.field.config.cache.enabled"; public static final String FIELD_CONFIG_CACHE_KEY_LIMIT = ".data.category.field.config.cache.limit"; + public static final String FIELD_CONFIG_CACHE_KEY_LIMIT_DEBUG = ".data.category.field.config.cache.limit.debug"; private static final Logger log = ThreadConfigurableLogger.getLogger(BaseIngestHelper.class); - private static final boolean DEFAULT_FIELD_CACHE_ENABLED = false; - private static final int DEFAULT_FIELD_CACHE_LIMIT = 100; - private Multimap> typeFieldMap = null; private Multimap> typePatternMap = null; private TreeMultimap> typeCompiledPatternMap = null; @@ -260,17 +258,21 @@ public void setup(Configuration config) { // Load the field helper, which takes precedence over the individual field configurations final String fieldConfigFile = config.get(this.getType().typeName() + FIELD_CONFIG_FILE); if (fieldConfigFile != null) { - final boolean fieldConfigCacheEnabled = config.getBoolean(this.getType().typeName() + FIELD_CONFIG_CACHE_ENABLED, DEFAULT_FIELD_CACHE_ENABLED); - final int fieldConfigCacheLimit = config.getInt(this.getType().typeName() + FIELD_CONFIG_CACHE_KEY_LIMIT, DEFAULT_FIELD_CACHE_LIMIT); + final boolean fieldConfigCacheEnabled = config.getBoolean(this.getType().typeName() + FIELD_CONFIG_CACHE_ENABLED, false); + final boolean fieldConfigCacheLimitDebug = config.getBoolean(this.getType().typeName() + FIELD_CONFIG_CACHE_KEY_LIMIT_DEBUG, false); + final int fieldConfigCacheLimit = config.getInt(this.getType().typeName() + FIELD_CONFIG_CACHE_KEY_LIMIT, 100); if (log.isDebugEnabled()) { log.debug("Field config file " + fieldConfigFile + " specified for: " + this.getType().typeName() + FIELD_CONFIG_FILE); log.debug("Field config cache enabled: " + fieldConfigCacheEnabled); if (fieldConfigCacheEnabled) { log.debug("Field config cache limit: " + fieldConfigCacheLimit); + log.debug("Field config cache limit debug: " + fieldConfigCacheLimitDebug); } } - final FieldConfigHelper baseHelper = XMLFieldConfigHelper.load(fieldConfigFile, this); - fieldConfigHelper = fieldConfigCacheEnabled ? new CachedFieldConfigHelper(baseHelper, fieldConfigCacheLimit) : baseHelper; + fieldConfigHelper = XMLFieldConfigHelper.load(fieldConfigFile, this); + if (fieldConfigCacheEnabled) { + fieldConfigHelper = new CachedFieldConfigHelper(fieldConfigHelper, fieldConfigCacheLimit, fieldConfigCacheLimitDebug); + } } // Process the indexed fields diff --git a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachedFieldConfigHelperTest.java b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachedFieldConfigHelperTest.java index 18837a7c1c2..3c878c350f7 100644 --- a/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachedFieldConfigHelperTest.java +++ b/warehouse/ingest-core/src/test/java/datawave/ingest/data/config/CachedFieldConfigHelperTest.java @@ -1,7 +1,9 @@ package datawave.ingest.data.config; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; @@ -62,7 +64,7 @@ public void testCachingLimitsBetweenFieldsAndAttributeTypes() { AtomicLong storedCounter = new AtomicLong(); AtomicLong indexCounter = new AtomicLong(); FieldConfigHelper innerHelper = mock(FieldConfigHelper.class); - CachedFieldConfigHelper helper = new CachedFieldConfigHelper(innerHelper, 2); + CachedFieldConfigHelper helper = new CachedFieldConfigHelper(innerHelper, 2, true); when(innerHelper.isStoredField(any())).then((a) -> { storedCounter.incrementAndGet(); @@ -82,12 +84,15 @@ public void testCachingLimitsBetweenFieldsAndAttributeTypes() { helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", innerHelper::isStoredField); assertEquals(1, storedCounter.get(), "field1 should compute result (new field)"); + assertFalse(helper.hasLimitExceeded()); helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field1", innerHelper::isStoredField); assertEquals(1, storedCounter.get(), "field1 repeated (existing field)"); + assertFalse(helper.hasLimitExceeded()); helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField); assertEquals(2, storedCounter.get(), "field2 should compute result (new field)"); + assertTrue(helper.hasLimitExceeded()); helper.getFieldResult(CachedFieldConfigHelper.AttributeType.STORED_FIELD, "field2", innerHelper::isStoredField); assertEquals(2, storedCounter.get(), "field2 repeated (existing)");