Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid re-normalizing unfielded lookup results #2218

Open
wants to merge 21 commits into
base: integration
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4ece2c2
Initial changes to introduce caching of expanded fields
foster33 Jan 16, 2024
6b84111
Merge branch 'integration' into feature/issue-877
foster33 Jan 30, 2024
220fc25
Implement field caching in more locations
foster33 Jan 30, 2024
1d3d03e
Merge branch 'integration' into feature/issue-877
KhakiField Mar 19, 2024
cf3951f
Caching improvements
foster33 Mar 26, 2024
dcff2e9
revert code, fix caching, add test
foster33 Apr 4, 2024
b6b99ce
make the caching of previously expanded fields configurable
foster33 Apr 4, 2024
fac068f
Fix case where threshold exceeded values can be put into the cache
foster33 Apr 4, 2024
0ae0f5d
update shardqueryconfigurationtest
foster33 Apr 5, 2024
5ca6983
Fix mvn spotbug check bug & remove unnecessary code
foster33 Apr 8, 2024
308635a
change cache default value & move config cache check
foster33 Apr 8, 2024
f642c01
Revert ShardIndexQueryTableStaticMethods & FieldNameIndexLookup
foster33 Apr 8, 2024
843dd11
Fix testExceededTermThreshold failure
foster33 Apr 9, 2024
98c901f
Merge branch 'integration' into feature/issue-877
foster33 Apr 17, 2024
7027b15
Merge branch 'integration' into feature/issue-877
foster33 Apr 26, 2024
4754425
Merge branch 'integration' into feature/issue-877
foster33 May 6, 2024
e982e1d
Merge integration & resolve conflicts
foster33 Aug 2, 2024
742a44a
Fix formatting
foster33 Aug 2, 2024
b35f061
Merge branch 'integration' into feature/issue-877
foster33 Aug 20, 2024
70b2547
Merge integration and resolve conflicts
foster33 Sep 13, 2024
b63a528
Merge integration & resolve conflicts
foster33 Nov 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ public class ShardQueryConfiguration extends GenericQueryConfiguration implement
private boolean pruneQueryByIngestTypes = false;
// should this query reduce the set of fields prior to serialization
private boolean reduceQueryFields = false;
// should previously expanded fields be cached to prevent the re-normalizing of actual values
private boolean cachePreviouslyExpandedFields = false;
foster33 marked this conversation as resolved.
Show resolved Hide resolved
private boolean reduceQueryFieldsPerShard = false;
private boolean reduceTypeMetadata = false;
private boolean reduceTypeMetadataPerShard = false;
Expand Down Expand Up @@ -507,6 +509,7 @@ public ShardQueryConfiguration(ShardQueryConfiguration other) {
this.setReduceTypeMetadata(other.getReduceTypeMetadata());
this.setReduceTypeMetadataPerShard(other.getReduceTypeMetadataPerShard());
this.setParseTldUids(other.getParseTldUids());
this.setCachePreviouslyExpandedFields(other.isCachePreviouslyExpandedFields());
this.setSequentialScheduler(other.getSequentialScheduler());
this.setCollectTimingDetails(other.getCollectTimingDetails());
this.setLogTimingDetails(other.getLogTimingDetails());
Expand Down Expand Up @@ -2622,6 +2625,14 @@ public void setReduceIngestTypesPerShard(boolean reduceIngestTypesPerShard) {
this.reduceIngestTypesPerShard = reduceIngestTypesPerShard;
}

public boolean isCachePreviouslyExpandedFields() {
return cachePreviouslyExpandedFields;
}

public void setCachePreviouslyExpandedFields(boolean cachePreviouslyExpandedFields) {
this.cachePreviouslyExpandedFields = cachePreviouslyExpandedFields;
}

public boolean getUseTermCounts() {
return useTermCounts;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package datawave.query.jexl.lookups;

import java.util.Map;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;

public class ExpandedFieldCache {
foster33 marked this conversation as resolved.
Show resolved Hide resolved
private Multimap<String,ValueSet> previouslyExpandedFieldCache = HashMultimap.create();

private boolean containsExpansionsFor(IndexLookupMap fieldstoterms) {
for (Map.Entry<String,ValueSet> fieldTermPair : fieldstoterms.entrySet()) {
if (previouslyExpandedFieldCache.containsEntry(fieldTermPair.getKey(), fieldTermPair.getValue())) {
return true;
}
}
return false;
}

public boolean containsExpansionsFor(String fieldName, String literal) {
if (previouslyExpandedFieldCache.containsKey(fieldName)) {
for (ValueSet value : previouslyExpandedFieldCache.get(fieldName)) {
if (value.contains(literal)) {
return true;
}
}
}
return false;
}

public void addExpansion(IndexLookupMap fieldstoterms) {
if (!containsExpansionsFor(fieldstoterms)) {
addExpansionToCache(fieldstoterms);
}
}

private void addExpansionToCache(IndexLookupMap fieldstoterms) {
if (!fieldstoterms.isKeyThresholdExceeded()) {
for (Map.Entry<String,ValueSet> fieldTermPair : fieldstoterms.entrySet()) {
if (!fieldTermPair.getValue().isThresholdExceeded()) {
previouslyExpandedFieldCache.put(fieldTermPair.getKey(), fieldTermPair.getValue());
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ public void submit() {
try {
if (!fields.isEmpty()) {
for (String term : terms) {

foster33 marked this conversation as resolved.
Show resolved Hide resolved
Set<Range> ranges = Collections.singleton(ShardIndexQueryTableStaticMethods.getLiteralRange(term));
if (config.getLimitAnyFieldLookups()) {
log.trace("Creating configureTermMatchOnly");
Expand All @@ -95,13 +94,10 @@ public void submit() {
for (String field : fields) {
bs.getOptions().fetchColumnFamily(new Text(field));
}

sessions.add(bs);

iter = Iterators.concat(iter, bs);
}
}

timedScanFuture = execService.submit(createTimedCallable(iter));
} catch (TableNotFoundException e) {
log.error(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ public static IndexLookup normalizeQueryTerm(JexlNode node, ShardQueryConfigurat
public static IndexLookup normalizeQueryTerm(String literal, ShardQueryConfiguration config, ScannerFactory scannerFactory, Set<String> expansionFields,
Set<Type<?>> dataTypes, MetadataHelper helperRef, ExecutorService execService) throws TableNotFoundException {
Set<String> terms = Sets.newHashSet(literal);

foster33 marked this conversation as resolved.
Show resolved Hide resolved
for (Type<?> normalizer : dataTypes) {
try {
String normalizedValue = normalizer.normalize(literal);
Expand All @@ -126,7 +125,6 @@ public static IndexLookup normalizeQueryTerm(String literal, ShardQueryConfigura
}
}
}

return new FieldNameIndexLookup(config, scannerFactory, getIndexedExpansionFields(expansionFields, false, config.getDatatypeFilter(), helperRef), terms,
execService);
}
Expand Down Expand Up @@ -213,7 +211,6 @@ public static IndexLookup normalizeQueryTerm(ASTNENode node, ShardQueryConfigura
protected static IndexLookup _normalizeQueryTerm(JexlNode node, ShardQueryConfiguration config, ScannerFactory scannerFactory, Set<String> expansionFields,
Set<Type<?>> dataTypes, MetadataHelper helperRef, ExecutorService execService) throws TableNotFoundException {
Object literal = JexlASTHelper.getLiteralValue(node);

if (literal instanceof String) {
return normalizeQueryTerm((String) literal, config, scannerFactory, expansionFields, dataTypes, helperRef, execService);
} else if (literal instanceof Number) {
Expand Down Expand Up @@ -383,7 +380,6 @@ public static IndexLookup expandRegexTerms(ASTERNode node, ShardQueryConfigurati
log.warn("Encountered literal that was not a String nor a Number: " + literal.getClass().getName() + ", " + literal);
}
}

return new RegexIndexLookup(config, scannerFactory, fieldName, patterns, helperRef, execService);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import datawave.query.config.ShardQueryConfiguration;
import datawave.query.exceptions.DatawaveFatalQueryException;
import datawave.query.jexl.lookups.AsyncIndexLookup;
import datawave.query.jexl.lookups.ExpandedFieldCache;
import datawave.query.jexl.lookups.IndexLookup;
import datawave.query.planner.pushdown.CostEstimator;
import datawave.query.tables.ScannerFactory;
Expand Down Expand Up @@ -49,21 +50,25 @@ public abstract class BaseIndexExpansionVisitor extends RebuildingVisitor {
protected Map<String,IndexLookup> lookupMap;
protected List<FutureJexlNode> futureJexlNodes;

protected BaseIndexExpansionVisitor(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper, String threadName)
throws TableNotFoundException {
this(config, scannerFactory, helper, null, threadName);
protected ExpandedFieldCache previouslyExpandedFieldCache;

protected BaseIndexExpansionVisitor(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper, String threadName,
ExpandedFieldCache previouslyExpandedFieldCache) throws TableNotFoundException {
this(config, scannerFactory, helper, null, threadName, previouslyExpandedFieldCache);
}

// The constructor should not be made public so that we can ensure that the executor is set up and shutdown correctly
protected BaseIndexExpansionVisitor(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper, Map<String,IndexLookup> lookupMap,
String threadName) throws TableNotFoundException {
String threadName, ExpandedFieldCache previouslyExpandedFieldCache) throws TableNotFoundException {
this.config = config;
this.scannerFactory = scannerFactory;
this.helper = helper;
this.expandFields = config.isExpandFields();
this.expandValues = config.isExpandValues();
this.threadName = threadName;

this.previouslyExpandedFieldCache = previouslyExpandedFieldCache;

this.indexOnlyFields = helper.getIndexOnlyFields(config.getDatatypeFilter());
this.allFields = helper.getAllFields(config.getDatatypeFilter());

Expand Down Expand Up @@ -111,7 +116,6 @@ protected <T extends JexlNode> T expand(T script) {
if (rebuiltScript instanceof FutureJexlNode) {
rebuiltScript = (T) ((FutureJexlNode) rebuiltScript).getRebuiltNode();
}

return rebuiltScript;
} finally {
shutdownExecutor();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import datawave.query.jexl.JexlASTHelper;
import datawave.query.jexl.JexlNodeFactory;
import datawave.query.jexl.LiteralRange;
import datawave.query.jexl.lookups.ExpandedFieldCache;
import datawave.query.jexl.lookups.IndexLookup;
import datawave.query.jexl.lookups.IndexLookupMap;
import datawave.query.jexl.lookups.ShardIndexQueryTableStaticMethods;
Expand All @@ -35,9 +36,9 @@ public class BoundedRangeIndexExpansionVisitor extends BaseIndexExpansionVisitor
private final JexlASTHelper.RangeFinder rangeFinder;

// The constructor should not be made public so that we can ensure that the executor is setup and shutdown correctly
protected BoundedRangeIndexExpansionVisitor(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper)
throws TableNotFoundException {
super(config, scannerFactory, helper, "BoundedRangeIndexExpansion");
protected BoundedRangeIndexExpansionVisitor(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper,
ExpandedFieldCache prevExpandedFieldCache) throws TableNotFoundException {
super(config, scannerFactory, helper, "BoundedRangeIndexExpansion", prevExpandedFieldCache);

rangeFinder = JexlASTHelper.findRange().indexedOnly(this.config.getDatatypeFilter(), this.helper).notDelayed();
}
Expand All @@ -59,11 +60,11 @@ protected BoundedRangeIndexExpansionVisitor(ShardQueryConfiguration config, Scan
* @throws TableNotFoundException
* if we fail to retrieve fields from the metadata helper
*/
public static <T extends JexlNode> T expandBoundedRanges(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper, T script)
throws TableNotFoundException {
public static <T extends JexlNode> T expandBoundedRanges(ShardQueryConfiguration config, ScannerFactory scannerFactory, MetadataHelper helper, T script,
ExpandedFieldCache previouslyExpandedFieldCache) throws TableNotFoundException {
// if not expanding fields or values, then this is a noop
if (config.isExpandFields() || config.isExpandValues()) {
BoundedRangeIndexExpansionVisitor visitor = new BoundedRangeIndexExpansionVisitor(config, scannerFactory, helper);
BoundedRangeIndexExpansionVisitor visitor = new BoundedRangeIndexExpansionVisitor(config, scannerFactory, helper, previouslyExpandedFieldCache);
return visitor.expand(script);
} else {
return script;
Expand Down Expand Up @@ -103,7 +104,9 @@ protected IndexLookup createLookup(LiteralRange<?> range) {
protected void rebuildFutureJexlNode(FutureJexlNode futureJexlNode) {
JexlNode currentNode = futureJexlNode.getOrigNode();
IndexLookupMap fieldsToTerms = futureJexlNode.getLookup().lookup();

if (config.isCachePreviouslyExpandedFields()) {
previouslyExpandedFieldCache.addExpansion(fieldsToTerms);
}
futureJexlNode.setRebuiltNode(JexlNodeFactory.createNodeTreeFromFieldsToValues(JexlNodeFactory.ContainerType.OR_NODE, false, currentNode, fieldsToTerms,
expandFields, expandValues, futureJexlNode.isKeepOriginalNode()));
}
Expand Down
Loading
Loading