diff --git a/core/query/src/main/java/datawave/core/query/logic/QueryLogic.java b/core/query/src/main/java/datawave/core/query/logic/QueryLogic.java index a85ddfb3f29..a0b6fee3999 100644 --- a/core/query/src/main/java/datawave/core/query/logic/QueryLogic.java +++ b/core/query/src/main/java/datawave/core/query/logic/QueryLogic.java @@ -7,8 +7,11 @@ import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.security.Authorizations; +import org.apache.commons.collections4.Transformer; import org.apache.commons.collections4.iterators.TransformIterator; +import com.google.common.collect.HashMultimap; + import datawave.audit.SelectorExtractor; import datawave.core.common.connection.AccumuloConnectionFactory; import datawave.core.query.cache.ResultsPage; @@ -24,6 +27,7 @@ import datawave.webservice.query.exception.QueryException; import datawave.webservice.query.result.event.ResponseObjectFactory; import datawave.webservice.result.BaseResponse; +import datawave.webservice.result.QueryValidationResponse; public interface QueryLogic extends Iterable, Cloneable, ParameterValidator { @@ -481,4 +485,31 @@ default void preInitialize(Query settings, Set userAuthorization void setServerUser(ProxiedUserDetails serverUser); + /** + * Validates the given query according to the validation criteria established for the query logic. + * + * @param client + * the Accumulo connector to use for this query + * @param query + * the query settings (query, begin date, end date, etc.) + * @param auths + * the authorizations that have been calculated for this query based on the caller and server. + * @param expandFields + * @param expandValues + * @return a list of messages detailing any issues found for the query + */ + default Object validateQuery(AccumuloClient client, Query query, Set auths, boolean expandFields, boolean expandValues) throws Exception { + throw new UnsupportedOperationException("Query validation not implemented"); + } + + /** + * Return a transformer that will convert the result of {@link QueryLogic#validateQuery(AccumuloClient, Query, Set, boolean, boolean)} to a + * {@link QueryValidationResponse}. + * + * @return the transformer + */ + default Transformer getQueryValidationResponseTransformer() { + throw new UnsupportedOperationException("Query validation response transformer not implemented"); + } + } diff --git a/pom.xml b/pom.xml index 56b8c966e42..7735ce19da1 100644 --- a/pom.xml +++ b/pom.xml @@ -1584,7 +1584,7 @@ org.junit.jupiter - junit-jupiter-engine + junit-jupiter ${version.junit.bom} @@ -1694,7 +1694,7 @@ org.junit.jupiter - junit-jupiter-engine + junit-jupiter ${version.junit.bom} diff --git a/warehouse/query-core/pom.xml b/warehouse/query-core/pom.xml index 9b484e98b60..b7bb655f7f7 100644 --- a/warehouse/query-core/pom.xml +++ b/warehouse/query-core/pom.xml @@ -395,6 +395,11 @@ junit-jupiter-api test + + org.junit.jupiter + junit-jupiter-params + test + org.mockito mockito-core diff --git a/warehouse/query-core/src/main/java/datawave/query/Constants.java b/warehouse/query-core/src/main/java/datawave/query/Constants.java index c1ab9ff4f56..1df2003fb7b 100644 --- a/warehouse/query-core/src/main/java/datawave/query/Constants.java +++ b/warehouse/query-core/src/main/java/datawave/query/Constants.java @@ -40,6 +40,8 @@ public class Constants { public static final String PIPE = "|"; + public static final String ASTERISK = "*"; + public static final Text TEXT_NULL = new Text(NULL); public static final Text FI_PREFIX = new Text("fi"); @@ -95,4 +97,11 @@ public class Constants { public static final String END_DATE = "end.date"; public static final String COLUMN_VISIBILITY = "columnVisibility"; + + public static final Character BACKSLASH_CHAR = '\\'; + public static final Character ASTERISK_CHAR = '*'; + + public static final String JEXL = "JEXL"; + public static final String LUCENE = "LUCENE"; + public static final String LUCENE_UUID = "LUCENE-UUID"; } diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsDescriptor.java index 73492a6a108..a5145b23dda 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/EvaluationPhaseFilterFunctionsDescriptor.java @@ -198,7 +198,8 @@ public Set fieldsForNormalization(MetadataHelper helper, Set dat public Set fields(MetadataHelper helper, Set datatypeFilter) { FunctionJexlNodeVisitor functionMetadata = new FunctionJexlNodeVisitor(); node.jjtAccept(functionMetadata, null); - Set fields = Sets.newHashSet(); + // Maintain insertion order. + Set fields = Sets.newLinkedHashSet(); List arguments = functionMetadata.args(); if (MATCHCOUNTOF.equals(functionMetadata.name())) { diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java index 718cfa7ff50..d72445c8749 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/functions/QueryFunctionsDescriptor.java @@ -20,6 +20,7 @@ import org.apache.commons.jexl3.parser.JexlNodes; import org.apache.commons.jexl3.parser.ParserTreeConstants; +import datawave.query.Constants; import datawave.query.attributes.AttributeFactory; import datawave.query.attributes.UniqueFields; import datawave.query.config.ShardQueryConfiguration; @@ -29,9 +30,11 @@ import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor; import datawave.query.jexl.nodes.QueryPropertyMarker; import datawave.query.jexl.visitors.EventDataQueryExpressionVisitor; +import datawave.query.jexl.visitors.PrintingVisitor; import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; import datawave.query.util.DateIndexHelper; import datawave.query.util.MetadataHelper; +import datawave.util.StringUtils; public class QueryFunctionsDescriptor implements JexlFunctionArgumentDescriptorFactory { @@ -152,6 +155,15 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { case QueryFunctions.NO_EXPANSION: case QueryFunctions.LENIENT_FIELDS_FUNCTION: case QueryFunctions.STRICT_FIELDS_FUNCTION: + case QueryFunctions.EXCERPT_FIELDS_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_YEAR_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MONTH_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_DAY_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_HOUR_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_TENTH_OF_HOUR_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MINUTE_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_SECOND_FUNCTION: + case QueryOptionsFromQueryVisitor.UniqueFunction.UNIQUE_BY_MILLISECOND_FUNCTION: // In practice each of these functions should be parsed from the query // almost immediately. This implementation is added for consistency for (JexlNode arg : args) { @@ -181,6 +193,12 @@ public Set fields(MetadataHelper helper, Set datatypeFilter) { } } break; + case QueryFunctions.RENAME_FUNCTION: + for (JexlNode arg : args) { + String value = JexlNodes.getIdentifierOrLiteralAsString(arg); + String[] parts = StringUtils.split(value, Constants.EQUALS); + fields.add(parts[0]); + } case QueryFunctions.MATCH_REGEX: case BETWEEN: case LENGTH: diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FetchFunctionFieldsVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FetchFunctionFieldsVisitor.java new file mode 100644 index 00000000000..ca10043cffb --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FetchFunctionFieldsVisitor.java @@ -0,0 +1,148 @@ +package datawave.query.jexl.visitors; + +import java.lang.reflect.Array; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; + +import org.apache.commons.jexl3.parser.ASTFunctionNode; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.commons.lang3.tuple.Pair; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.LinkedHashMultimap; + +import datawave.query.jexl.functions.FunctionJexlNodeVisitor; +import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory; +import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor; +import datawave.query.util.MetadataHelper; + +/** + * A visitor that fetches all fields from the specified functions. + */ +public class FetchFunctionFieldsVisitor extends ShortCircuitBaseVisitor { + + private final Set> functions; + private final MetadataHelper metadataHelper; + // Maintain insertion order. + private final LinkedHashMultimap,String> fields = LinkedHashMultimap.create(); + + /** + * Fetch the fields seen in the specified functions. + * + * @param query + * the query tree + * @param functions + * the set of {@code } pairs to filter on + * @param metadataHelper + * @return the set of fields found within the functions + */ + public static Set fetchFields(JexlNode query, Set> functions, MetadataHelper metadataHelper) { + if (query != null) { + FetchFunctionFieldsVisitor visitor = new FetchFunctionFieldsVisitor(functions, metadataHelper); + query.jjtAccept(visitor, functions); + return visitor.getFunctionFields(); + } else { + return Collections.emptySet(); + } + } + + private FetchFunctionFieldsVisitor(Set> functions, MetadataHelper metadataHelper) { + if (functions == null || functions.isEmpty()) { + this.functions = Collections.emptySet(); + } else { + this.functions = new HashSet<>(); + functions.forEach((p) -> this.functions.add(Pair.of(p.getLeft(), p.getRight()))); + } + this.metadataHelper = metadataHelper; + } + + @Override + public Object visit(ASTFunctionNode node, Object data) { + FunctionJexlNodeVisitor visitor = new FunctionJexlNodeVisitor(); + node.jjtAccept(visitor, null); + + Pair function = Pair.of(visitor.namespace(), visitor.name()); + // If we are either not filtering out functions, or the function filters contains the functions, fetch the fields. + if (functions.isEmpty() || functions.contains(function)) { + JexlArgumentDescriptor desc = JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node); + Set fields = desc.fields(metadataHelper, null); + // Add the fields to the function. + if (!fields.isEmpty()) { + this.fields.putAll(function, fields); + } + } + return null; + } + + // Returns the fields map as a set of FunctionFields. + private Set getFunctionFields() { + // Maintain insertion order. + Set functionFields = new LinkedHashSet<>(); + for (Pair function : fields.keySet()) { + functionFields.add(new FunctionFields(function.getLeft(), function.getRight(), fields.get(function))); + } + return functionFields; + } + + public static class FunctionFields { + private final String namespace; + private final String function; + private final Set fields; + + public static FunctionFields of(String namespace, String function, String... fields) { + return new FunctionFields(namespace, function, Arrays.asList(fields)); + } + + private FunctionFields(String namespace, String function) { + this(namespace, function, Collections.emptySet()); + } + + private FunctionFields(String namespace, String function, Collection fields) { + this.namespace = namespace; + this.function = function; + // Maintain insertion order. + this.fields = fields.isEmpty() ? Collections.emptySet() : Collections.unmodifiableSet(new LinkedHashSet<>(fields)); + } + + public String getNamespace() { + return namespace; + } + + public String getFunction() { + return function; + } + + public Set getFields() { + return fields; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + FunctionFields that = (FunctionFields) object; + return Objects.equals(namespace, that.namespace) && Objects.equals(function, that.function) && Objects.equals(fields, that.fields); + } + + @Override + public int hashCode() { + return Objects.hash(namespace, function, fields); + } + + @Override + public String toString() { + return new StringJoiner(", ", FunctionFields.class.getSimpleName() + "[", "]").add("namespace='" + namespace + "'") + .add("function='" + function + "'").add("fields=" + fields).toString(); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FieldMissingFromSchemaVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FieldMissingFromSchemaVisitor.java index 1448d711e24..10fc979ea12 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FieldMissingFromSchemaVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FieldMissingFromSchemaVisitor.java @@ -2,6 +2,7 @@ import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.NoSuchElementException; import java.util.Set; @@ -66,7 +67,8 @@ public FieldMissingFromSchemaVisitor(MetadataHelper helper, Set datatype @SuppressWarnings("unchecked") public static Set getNonExistentFields(MetadataHelper helper, ASTJexlScript script, Set datatypes, Set specialFields) { FieldMissingFromSchemaVisitor visitor = new FieldMissingFromSchemaVisitor(helper, datatypes, specialFields); - return (Set) script.jjtAccept(visitor, new HashSet<>()); + // Maintain insertion order. + return (Set) script.jjtAccept(visitor, new LinkedHashSet<>()); } /** diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FieldsWithNumericValuesVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FieldsWithNumericValuesVisitor.java new file mode 100644 index 00000000000..90b7dd0b8f1 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/FieldsWithNumericValuesVisitor.java @@ -0,0 +1,91 @@ +package datawave.query.jexl.visitors; + +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTEQNode; +import org.apache.commons.jexl3.parser.ASTFunctionNode; +import org.apache.commons.jexl3.parser.ASTGENode; +import org.apache.commons.jexl3.parser.ASTGTNode; +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ASTLENode; +import org.apache.commons.jexl3.parser.ASTLTNode; +import org.apache.commons.jexl3.parser.ASTNENode; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.commons.lang3.tuple.Pair; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.functions.FunctionJexlNodeVisitor; +import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory; +import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor; + +/** + * A visitor that fetches all fields found in the query that have a numeric value. + */ +public class FieldsWithNumericValuesVisitor extends ShortCircuitBaseVisitor { + + /** + * Fetch all fields that have a numeric value. + * + * @param query + * the query + * @return the set of fields + */ + public static Set getFields(ASTJexlScript query) { + if (query == null) { + return Collections.emptySet(); + } else { + FieldsWithNumericValuesVisitor visitor = new FieldsWithNumericValuesVisitor(); + // Maintain insertion order of fields found. + return (Set) query.jjtAccept(visitor, new LinkedHashSet()); + } + } + + @Override + public Object visit(ASTEQNode node, Object data) { + checkSingleField(node, data); + return data; + } + + @Override + public Object visit(ASTNENode node, Object data) { + checkSingleField(node, data); + return data; + } + + @Override + public Object visit(ASTLTNode node, Object data) { + checkSingleField(node, data); + return data; + } + + @Override + public Object visit(ASTGTNode node, Object data) { + checkSingleField(node, data); + return data; + } + + @Override + public Object visit(ASTLENode node, Object data) { + checkSingleField(node, data); + return data; + } + + @Override + public Object visit(ASTGENode node, Object data) { + checkSingleField(node, data); + return data; + } + + private void checkSingleField(JexlNode node, Object data) { + String field = JexlASTHelper.getIdentifier(node); + if (field != null) { + Object literal = JexlASTHelper.getLiteralValue(node); + if (literal instanceof Number) { + ((Set) data).add(field); + } + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java index a9b9bdb8de0..fe2be50213e 100644 --- a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryOptionsFromQueryVisitor.java @@ -49,6 +49,11 @@ *
  • {@code f:unique_by_second()}: Expects a comma-delimited list of fields to be unique with a granularity level of by SECOND, e.g. * {@code unique_by_second('field1','field2')}
  • *
  • {@code f:rename}: Expects a comma-delimited list field/field mappings e.g. {@code f:rename('field1=field2','field3=field4')}
  • + *
  • {@code f:sum}: Expects a comma-delimited list of fields to sum the values of in a grouping.
  • + *
  • {@code f:min}: Expects a comma-delimited list of fields to find the minimum value of in a grouping.
  • + *
  • {@code f:max}: Expects a comma-delimited list of fields to find the maximum value of in a grouping.
  • + *
  • {@code f:average}: Expects a comma-delimited list of fields to find the average value of in a grouping.
  • + *
  • {@code f:count}: Expects a comma-delimited list of fields to count the occurrences of in a grouping.
  • * */ public class QueryOptionsFromQueryVisitor extends RebuildingVisitor { diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryPatternsVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryPatternsVisitor.java new file mode 100644 index 00000000000..b9ce35c8e4e --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/QueryPatternsVisitor.java @@ -0,0 +1,87 @@ +package datawave.query.jexl.visitors; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTERNode; +import org.apache.commons.jexl3.parser.ASTFunctionNode; +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ASTNRNode; +import org.apache.commons.jexl3.parser.ASTStringLiteral; +import org.apache.commons.jexl3.parser.JexlNode; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.functions.FunctionJexlNodeVisitor; +import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory; +import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor; + +/** + * A visitor that will return the set of all unique patterns found in the query. + */ +public class QueryPatternsVisitor extends ShortCircuitBaseVisitor { + + /** + * Return the set of all patterns found in the query. + * + * @param query + * the query + * @return the patterns + */ + public static Set findPatterns(ASTJexlScript query) { + Set patterns = new HashSet<>(); + if (query == null) { + return patterns; + } else { + QueryPatternsVisitor visitor = new QueryPatternsVisitor(); + query.jjtAccept(visitor, patterns); + return patterns; + } + } + + @Override + public Object visit(ASTERNode node, Object data) { + addPattern(node, data); + return data; + } + + @Override + public Object visit(ASTNRNode node, Object data) { + addPattern(node, data); + return data; + } + + @Override + public Object visit(ASTFunctionNode node, Object data) { + JexlArgumentDescriptor descriptor = JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node); + if (descriptor == null) { + throw new IllegalStateException("Could not get descriptor for ASTFunctionNode"); + } + + // If the function descriptor indicates the function has regex arguments, extract the arguments. + if (descriptor.regexArguments()) { + FunctionJexlNodeVisitor functionVisitor = new FunctionJexlNodeVisitor(); + functionVisitor.visit(node, null); + List args = functionVisitor.args(); + // Add each string literal argument as a regex pattern. + args.stream().filter(arg -> arg instanceof ASTStringLiteral).forEach(arg -> addPattern(arg, data)); + } + return data; + } + + private void addPattern(JexlNode node, Object data) { + // Catch the situation where a user might enter FIELD1 !~ VALUE1 + Object literalValue; + try { + literalValue = JexlASTHelper.getLiteralValue(node); + } catch (Exception e) { + // in this case there was no literal (e.g. FIELD1 !~ FIELD2) + return; + } + + if (literalValue != null && String.class.equals(literalValue.getClass())) { + ((Set) data).add((String) literalValue); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/UnescapedSpecialCharactersVisitor.java b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/UnescapedSpecialCharactersVisitor.java new file mode 100644 index 00000000000..9cc50eaab83 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/jexl/visitors/UnescapedSpecialCharactersVisitor.java @@ -0,0 +1,296 @@ +package datawave.query.jexl.visitors; + +import static datawave.query.Constants.BACKSLASH_CHAR; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import org.apache.commons.jexl3.parser.ASTEQNode; +import org.apache.commons.jexl3.parser.ASTERNode; +import org.apache.commons.jexl3.parser.ASTFunctionNode; +import org.apache.commons.jexl3.parser.ASTGENode; +import org.apache.commons.jexl3.parser.ASTGTNode; +import org.apache.commons.jexl3.parser.ASTLENode; +import org.apache.commons.jexl3.parser.ASTLTNode; +import org.apache.commons.jexl3.parser.ASTNENode; +import org.apache.commons.jexl3.parser.ASTNRNode; +import org.apache.commons.jexl3.parser.ASTStringLiteral; +import org.apache.commons.jexl3.parser.JexlNode; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.LinkedHashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; +import com.google.common.collect.SetMultimap; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.functions.FunctionJexlNodeVisitor; +import datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory; +import datawave.query.jexl.functions.arguments.JexlArgumentDescriptor; + +/** + * This visitor provides the ability to detect and return unescaped special characters found in string literals and regex patterns in the query. + */ +public class UnescapedSpecialCharactersVisitor extends ShortCircuitBaseVisitor { + + public static final Set patternReservedCharacters = Collections + .unmodifiableSet(Set.of('.', '+', '*', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\')); + + private final Set literalExceptions; + private final boolean escapedWhitespaceRequiredForLiterals; + private final Set patternExceptions; + private final boolean escapedWhitespaceRequiredForPatterns; + private final Map> literalMap = new HashMap<>(); + private final Map> patternMap = new HashMap<>(); + + /** + * Returns a {@link UnescapedSpecialCharactersVisitor} that has traversed over the given queryTree and has searched for unescaped special characters in + * string literals and regex patterns, taking into account the given exceptions. Whitespace characters will not be flagged if they are not escaped. + * + * @param queryTree + * the query + * @param literalExceptions + * the characters that may be unescaped in string literals + * @param patternExceptions + * the characters that may be unescaped in regex patterns. This will always include the set of regex-reserved characters. + * @return the visitor + */ + public static UnescapedSpecialCharactersVisitor check(JexlNode queryTree, Set literalExceptions, Set patternExceptions) { + return check(queryTree, literalExceptions, false, patternExceptions, false); + } + + /** + * Returns a {@link UnescapedSpecialCharactersVisitor} that has traversed over the given queryTree and has searched for unescaped special characters in + * string literals and regex patterns, taking into account the given exceptions and whitespace criteria. + * + * @param queryTree + * the query + * @param literalExceptions + * the characters that may be unescaped in string literals + * @param escapedWhitespaceRequiredForLiterals + * if true, whitespace characters must be escaped in string literals or they will be flagged + * @param patternExceptions + * the characters that may be unescaped in regex patterns. This will always include the set of regex-reserved characters. + * @param escapedWhitespaceRequiredForPatterns + * if true, whitespace characters must be escaped in regex patterns or they will be flagged + * @return the visitor + */ + public static UnescapedSpecialCharactersVisitor check(JexlNode node, Set literalExceptions, boolean escapedWhitespaceRequiredForLiterals, + Set patternExceptions, boolean escapedWhitespaceRequiredForPatterns) { + UnescapedSpecialCharactersVisitor visitor = new UnescapedSpecialCharactersVisitor(literalExceptions, escapedWhitespaceRequiredForLiterals, + patternExceptions, escapedWhitespaceRequiredForPatterns); + node.jjtAccept(visitor, null); + return visitor; + } + + private UnescapedSpecialCharactersVisitor(Set literalExceptions, boolean escapedWhitespaceRequiredForLiterals, Set patternExceptions, + boolean escapedWhitespaceRequiredForPatterns) { + this.literalExceptions = new HashSet<>(literalExceptions); + // Ensure the allowed pattern special characters always include the seto of pattern reserved characters. + this.patternExceptions = new HashSet<>(patternExceptions); + this.patternExceptions.addAll(patternReservedCharacters); + this.escapedWhitespaceRequiredForLiterals = escapedWhitespaceRequiredForLiterals; + this.escapedWhitespaceRequiredForPatterns = escapedWhitespaceRequiredForPatterns; + } + + /** + * Returns a map of string literals where unescaped characters were found to their unescaped characters. + * + * @return the multimap + */ + public SetMultimap getUnescapedCharactersInLiterals() { + return getMultimap(literalMap); + } + + /** + * Returns a map of pattern patterns unescaped characters were found to their unescaped characters. + * + * @return the multimap + */ + public SetMultimap getUnescapedCharactersInPatterns() { + return getMultimap(patternMap); + } + + // Returns a {@link SetMultimap} of all entries in the given map where the value set is not empty. + + /** + * Returns a {@link SetMultimap} of all entries in the given map where the value set is not empty. + * + * @param map + * the map + * @return the multimap + */ + private SetMultimap getMultimap(Map> map) { + // @formatter:off + Map> relevantEntries = map.entrySet().stream() + .filter(entry -> !entry.getValue().isEmpty()) + .collect(Collectors.toMap(entry -> entry.getKey(), entry -> entry.getValue())); + // @formatter:on + // Maintain insertion order. + SetMultimap multimap = LinkedHashMultimap.create(); + relevantEntries.entrySet().forEach(entry -> multimap.putAll(entry.getKey(), entry.getValue())); + return multimap; + } + + @Override + public Object visit(ASTEQNode node, Object data) { + checkLiteral(node); + return data; + } + + @Override + public Object visit(ASTNENode node, Object data) { + checkLiteral(node); + return data; + } + + @Override + public Object visit(ASTLTNode node, Object data) { + checkLiteral(node); + return data; + } + + @Override + public Object visit(ASTGTNode node, Object data) { + checkLiteral(node); + return data; + } + + @Override + public Object visit(ASTLENode node, Object data) { + checkLiteral(node); + return data; + } + + @Override + public Object visit(ASTGENode node, Object data) { + checkLiteral(node); + return data; + } + + @Override + public Object visit(ASTERNode node, Object data) { + checkPattern(node); + return data; + } + + @Override + public Object visit(ASTNRNode node, Object data) { + checkPattern(node); + return data; + } + + @Override + public Object visit(ASTFunctionNode node, Object data) { + JexlArgumentDescriptor descriptor = JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node); + if (descriptor == null) { + throw new IllegalStateException("Could not get descriptor for ASTFunctionNode"); + } + + // Determine which evaluation function to use based of whether the function accepts pattern or not. + Consumer evaluationFunction = descriptor.regexArguments() ? this::checkPattern : this::checkLiteral; + + FunctionJexlNodeVisitor functionVisitor = new FunctionJexlNodeVisitor(); + functionVisitor.visit(node, null); + List args = functionVisitor.args(); + args.stream().filter(arg -> arg instanceof ASTStringLiteral).forEach(evaluationFunction); + + return data; + } + + // Check the given node for a literal string with unescaped characters. + private void checkLiteral(JexlNode node) { + checkValue(node, this.literalMap, this.literalExceptions, this.escapedWhitespaceRequiredForLiterals); + } + + // Check the given node for a pattern value with unescaped characters. + private void checkPattern(JexlNode node) { + checkValue(node, this.patternMap, this.patternExceptions, this.escapedWhitespaceRequiredForPatterns); + } + + // Check the given node for unescaped characters, using the given list of exceptions, and whether whitespace characters must be escaped, and add them to the + // specified map. + private void checkValue(JexlNode node, Map> map, Set exceptions, boolean escapedWhitespaceRequired) { + Object literalValue; + // Catch the situation where no literal was given, e.g. FIELD1 !~ FIELD2. + try { + literalValue = JexlASTHelper.getLiteralValue(node); + } catch (Exception e) { + return; + } + + if (literalValue != null && String.class.equals(literalValue.getClass())) { + String literalString = (String) literalValue; + // Check if we have already examined this string before. + if (map.containsKey(literalString)) { + return; + } + + Set characters = getUnescapedSpecialChars(literalString, exceptions, escapedWhitespaceRequired); + map.put(literalString, characters); + } + } + + private Set getUnescapedSpecialChars(String str, Set allowedSpecialCharacters, boolean escapedWhitespaceRequired) { + if (str.isEmpty()) { + return Collections.emptySet(); + } + + // Maintain insertion order. + Set unescapedChars = new LinkedHashSet<>(); + int prevIndex = 0; + char[] chars = str.toCharArray(); + int totalChars = chars.length; + int lastIndex = totalChars - 1; + boolean isPrevBackslash = false; + + // Examine each character in the string. + for (int currIndex = 0; currIndex < totalChars; currIndex++) { + char currChar = chars[currIndex]; + if (currChar == BACKSLASH_CHAR) { + // If the previous character was a backslash, then this is an escaped backslash. Reset the isPrevBackslash flag and proceed to the next + // character. + if (isPrevBackslash) { + isPrevBackslash = false; + } else { + // If we have characters remaining, this backlash escapes the next character. + if (currIndex != lastIndex) { + isPrevBackslash = true; + } else { + // If this is the last character, it is an unescaped backslash. Treat it as invalid if it is not part of the allowed special characters. + if (!allowedSpecialCharacters.contains(currChar)) { + unescapedChars.add(currChar); + } + } + } + } else if (Character.isLetterOrDigit(currChar) || allowedSpecialCharacters.contains(currChar)) { + // The current character is a letter, digit, or one of the specified special char exceptions. + isPrevBackslash = false; + } else if (Character.isWhitespace(currChar)) { + // The current character is a whitespace. If escaped whitespace characters are required, and the previous character was not a backslash, track + // the + if (escapedWhitespaceRequired && !isPrevBackslash) { + unescapedChars.add(currChar); + } + isPrevBackslash = false; + } else { + // The current character is a special character that is not allowed to be unescaped. + if (!isPrevBackslash) { + // The character was not escaped by a backlash. Retain the character.. + unescapedChars.add(currChar); + } + isPrevBackslash = false; + } + } + return unescapedChars; + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Exclude.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Exclude.java index 3ef8476bae0..e7c3ea65329 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Exclude.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Exclude.java @@ -4,8 +4,11 @@ import datawave.query.search.WildcardFieldedFilter; public class Exclude extends AbstractEvaluationPhaseFunction { + + public static final String FUNCTION_NAME = "exclude"; + public Exclude() { - super("exclude"); + super(FUNCTION_NAME); } @Override diff --git a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Include.java b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Include.java index 93ffaff0898..d2f733ab2b1 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Include.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/functions/jexl/Include.java @@ -4,8 +4,11 @@ import datawave.query.search.WildcardFieldedFilter; public class Include extends AbstractEvaluationPhaseFunction { + + public static final String FUNCTION_NAME = "include"; + public Include() { - super("include"); + super(FUNCTION_NAME); } @Override diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/QueryParser.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/QueryParser.java index cb92641476d..7793be11c5c 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/QueryParser.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/QueryParser.java @@ -1,5 +1,7 @@ package datawave.query.language.parser; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; + import datawave.query.language.tree.QueryNode; public interface QueryParser { diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/JexlControlledQueryParser.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/JexlControlledQueryParser.java index 6b21794f582..f450a286839 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/JexlControlledQueryParser.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/JexlControlledQueryParser.java @@ -10,6 +10,7 @@ import org.apache.commons.jexl3.parser.ASTIdentifier; import org.apache.commons.jexl3.parser.JexlNode; import org.apache.commons.lang.StringUtils; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; import datawave.query.jexl.JexlASTHelper; import datawave.query.language.parser.ParseException; diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/LuceneToJexlQueryParser.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/LuceneToJexlQueryParser.java index 0261e2f4239..8b3708bd964 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/LuceneToJexlQueryParser.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/jexl/LuceneToJexlQueryParser.java @@ -7,10 +7,13 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; import org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder; import org.apache.lucene.queryparser.flexible.core.config.ConfigurationKey; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor; import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; +import org.checkerframework.checker.units.qual.A; import datawave.ingest.data.tokenize.StandardAnalyzer; import datawave.query.language.builder.jexl.JexlTreeBuilder; @@ -18,12 +21,13 @@ import datawave.query.language.parser.ParseException; import datawave.query.language.parser.QueryParser; import datawave.query.language.parser.lucene.AccumuloSyntaxParser; +import datawave.query.language.parser.lucene.LuceneSyntaxQueryParser; import datawave.query.language.parser.lucene.QueryConfigHandler; import datawave.query.language.processor.lucene.QueryNodeProcessorFactory; import datawave.query.language.tree.QueryNode; import datawave.query.language.tree.ServerHeadNode; -public class LuceneToJexlQueryParser implements QueryParser { +public class LuceneToJexlQueryParser implements LuceneSyntaxQueryParser { private static final String[] DEFAULT_TOKENIZED_FIELDS = {"TOKFIELD"}; private static final String[] DEFAULT_SKIP_TOKENIZE_UNFIELDED_FIELDS = {"NOTOKEN"}; @@ -61,23 +65,14 @@ public QueryNode parse(String query) throws ParseException { } public JexlNode convertToJexlNode(String query) throws ParseException { - query = query.replaceAll("\\u0093", "\""); // replace open smart quote 147 - query = query.replaceAll("\\u0094", "\""); // replace close smart quote 148 - - query = query.replaceAll("\\u201c", "\""); // replace open left double quote - query = query.replaceAll("\\u201d", "\""); // replace close right double quote - JexlNode parsedQuery = null; try { - Locale.setDefault(Locale.US); - AccumuloSyntaxParser syntaxParser = new AccumuloSyntaxParser(); - syntaxParser.enable_tracing(); + org.apache.lucene.queryparser.flexible.core.nodes.QueryNode queryTree = parseToLuceneQueryNode(query); QueryNodeProcessor processor = getQueryNodeProcessor(); QueryBuilder builder = new JexlTreeBuilder(allowedFunctions); - org.apache.lucene.queryparser.flexible.core.nodes.QueryNode queryTree = syntaxParser.parse(query, ""); queryTree = processor.process(queryTree); parsedQuery = (JexlNode) builder.build(queryTree); } catch (Exception e) { @@ -86,6 +81,20 @@ public JexlNode convertToJexlNode(String query) throws ParseException { return parsedQuery; } + @Override + public org.apache.lucene.queryparser.flexible.core.nodes.QueryNode parseToLuceneQueryNode(String query) throws QueryNodeParseException { + query = query.replaceAll("\\u0093", "\""); // replace open smart quote 147 + query = query.replaceAll("\\u0094", "\""); // replace close smart quote 148 + + query = query.replaceAll("\\u201c", "\""); // replace open left double quote + query = query.replaceAll("\\u201d", "\""); // replace close right double quote + + Locale.setDefault(Locale.US); + AccumuloSyntaxParser syntaxParser = new AccumuloSyntaxParser(); + syntaxParser.enable_tracing(); + return syntaxParser.parse(query, ""); + } + private QueryNodeProcessor getQueryNodeProcessor() { QueryConfigHandler queryConfigHandler = new QueryConfigHandler(); @@ -192,4 +201,5 @@ public List getAllowedFunctions() { public void setAllowedFunctions(List allowedFunctions) { this.allowedFunctions = allowedFunctions; } + } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/LuceneQueryParser.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/LuceneQueryParser.java index 46c9d2a41d5..8572d098c21 100644 --- a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/LuceneQueryParser.java +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/LuceneQueryParser.java @@ -12,14 +12,15 @@ import org.apache.log4j.Logger; import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; import org.apache.lucene.queryparser.flexible.core.builders.QueryBuilder; import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor; import datawave.query.language.builder.lucene.AccumuloQueryTreeBuilder; import datawave.query.language.functions.lucene.LuceneQueryFunction; import datawave.query.language.parser.ParseException; -import datawave.query.language.parser.QueryParser; import datawave.query.language.processor.lucene.CustomQueryNodeProcessorPipeline; import datawave.query.language.tree.FunctionNode; import datawave.query.language.tree.HardAndNode; @@ -30,22 +31,17 @@ import datawave.query.search.RangeFieldedTerm; import datawave.query.search.Term; -public class LuceneQueryParser implements QueryParser { +public class LuceneQueryParser implements LuceneSyntaxQueryParser { private static Logger log = Logger.getLogger(LuceneQueryParser.class.getName()); private Map filters = new HashMap<>(); private List allowedFunctions = null; @Override public datawave.query.language.tree.QueryNode parse(String query) throws ParseException { - query = query.replaceAll("\\u0093", "\""); // replace open smart quote 147 - query = query.replaceAll("\\u0094", "\""); // replace close smart quote 148 - datawave.query.language.tree.QueryNode parsedQuery = null; try { - Locale.setDefault(Locale.US); - AccumuloSyntaxParser syntaxParser = new AccumuloSyntaxParser(); - syntaxParser.enable_tracing(); + QueryNode queryTree = parseToLuceneQueryNode(query); org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler queryConfigHandler = new QueryConfigHandler(); QueryNodeProcessor processor = new CustomQueryNodeProcessorPipeline(queryConfigHandler); @@ -56,7 +52,6 @@ public datawave.query.language.tree.QueryNode parse(String query) throws ParseEx builder = new AccumuloQueryTreeBuilder(allowedFunctions); } - QueryNode queryTree = syntaxParser.parse(query, ""); queryTree = processor.process(queryTree); parsedQuery = (datawave.query.language.tree.QueryNode) builder.build(queryTree); @@ -78,6 +73,17 @@ public datawave.query.language.tree.QueryNode parse(String query) throws ParseEx return parsedQuery; } + @Override + public QueryNode parseToLuceneQueryNode(String query) throws QueryNodeParseException { + query = query.replaceAll("\\u0093", "\""); // replace open smart quote 147 + query = query.replaceAll("\\u0094", "\""); // replace close smart quote 148 + + Locale.setDefault(Locale.US); + AccumuloSyntaxParser parser = new AccumuloSyntaxParser(); + parser.enable_tracing(); + return parser.parse(query, ""); + } + public Map getFilters() { return filters; } diff --git a/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/LuceneSyntaxQueryParser.java b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/LuceneSyntaxQueryParser.java new file mode 100644 index 00000000000..fd8b7af95b9 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/language/parser/lucene/LuceneSyntaxQueryParser.java @@ -0,0 +1,11 @@ +package datawave.query.language.parser.lucene; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +import datawave.query.language.parser.QueryParser; + +public interface LuceneSyntaxQueryParser extends QueryParser { + + QueryNode parseToLuceneQueryNode(String query) throws QueryNodeParseException; +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/AmbigiousNotVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/AmbigiousNotVisitor.java new file mode 100644 index 00000000000..32708804901 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/AmbigiousNotVisitor.java @@ -0,0 +1,47 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +/** + * A visitor that checks a query for any usage of NOT with OR'd/AND'd terms before it that are not wrapped, e.g. {@code FIELD1:abc OR FIELD2:def NOT FIELD3:123} + * should be {@code (FIELD1:abc OR FIELD2:def) NOT FIELD3:123}. + */ +public class AmbigiousNotVisitor extends BaseVisitor { + + /** + * Returns a list of copies of any {@link NotBooleanQueryNode} instances in the given query that contain multiple unwrapped preceding terms. + * + * @param node + * the node + * @return the list of NOT node copies + */ + public static List check(QueryNode node) { + AmbigiousNotVisitor visitor = new AmbigiousNotVisitor(); + return (List) visitor.visit(node, new ArrayList()); + } + + @Override + public Object visit(NotBooleanQueryNode node, Object data) { + for (QueryNode child : node.getChildren()) { + QueryNodeType type = QueryNodeType.get(child.getClass()); + switch (type) { + case OR: + case AND: { + // If we see an OR or AND instead of GROUP, then we have multiple unwrapped terms preceding the NOT. Return a copy of this node. + ((List) data).add((NotBooleanQueryNode) copy(node)); + } + case MODIFIER: { + break; + } + default: + continue; + } + } + return data; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/AmbiguousUnfieldedTermsVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/AmbiguousUnfieldedTermsVisitor.java new file mode 100644 index 00000000000..0c1a6f17810 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/AmbiguousUnfieldedTermsVisitor.java @@ -0,0 +1,273 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +public class AmbiguousUnfieldedTermsVisitor extends BaseVisitor { + + public enum JUNCTION { + AND(QueryNodeType.AND, AndQueryNode::new), OR(QueryNodeType.OR, OrQueryNode::new); + + private final QueryNodeType type; + private final Function,QueryNode> constructor; + + JUNCTION(QueryNodeType type, Function,QueryNode> constructor) { + this.type = type; + this.constructor = constructor; + } + + public QueryNodeType getType() { + return type; + } + + public QueryNode getNewInstance(List children) { + return constructor.apply(children); + } + } + + /** + * Returns a list of copies of nodes representing fielded terms with unfielded terms directly following them that are conjoined by the specified junction. + * + * @param node + * the node + * @param junction + * the junction type AND/OR + * @return the list of ambiguous nodes + */ + public static List check(QueryNode node, JUNCTION junction) { + AmbiguousUnfieldedTermsVisitor visitor = new AmbiguousUnfieldedTermsVisitor(junction); + return (List) visitor.visit(node, new ArrayList()); + } + + private final JUNCTION junction; + + private AmbiguousUnfieldedTermsVisitor(JUNCTION junction) { + this.junction = junction; + } + + @Override + public Object visit(AndQueryNode node, Object data) { + return this.junction == JUNCTION.AND ? checkJunction(node, data) : super.visit(node, data); + } + + @Override + public Object visit(OrQueryNode node, Object data) { + return this.junction == JUNCTION.OR ? checkJunction(node, data) : super.visit(node, data); + } + + @Override + public Object visit(GroupQueryNode node, Object data) { + // If the group node consists entirely of a single fielded term with ambiguously ORed unfielded phrases, add a copy of the group node to the data. + if (groupConsistsOfUnfieldedTerms(node, false)) { + ((List) data).add(copy(node)); + return data; + } else { + // Otherwise, examine the children. + return super.visit(node, data); + } + } + + /** + * Checks the given junction (AND/OR) node for any unfielded terms directly following a fielded term. + * + * @param node + * the node + * @param data + * the data + * @return the updated data + */ + private Object checkJunction(QueryNode node, Object data) { + // The list of linked ambiguous phrases. + List ambiguousPhrases = null; + // The first fielded term that should be part of the list above. + QueryNode fieldedTerm = null; + + // Examine each child of the OR. + for (QueryNode child : node.getChildren()) { + QueryNodeType type = QueryNodeType.get(child.getClass()); + switch (type) { + // The current child is a FIELD. + case FIELD: + // The current child is an unfielded term. + if (((FieldQueryNode) child).getFieldAsString().isEmpty()) { + // If we have found a fielded term in the preceding terms, the child is part of the current set of ambiguous phrases. + if (fieldedTerm != null) { + // Ensure the list of ambiguous phrases is initialized with a copy of the fielded term as the first element. + if (ambiguousPhrases == null) { + ambiguousPhrases = new ArrayList<>(); + ambiguousPhrases.add(copy(fieldedTerm)); + } + // Add a copy of the unfielded terms. + ambiguousPhrases.add(copy(child)); + } + // The current child is a fielded term. + } else { + // We are already tracking a fielded term. + if (fieldedTerm != null) { + // The current child is a new fielded term. If we found ambiguous phrases in the preceding terms, add a new OR node with the phrases + // to the data and reset the list. + if (ambiguousPhrases != null) { + ((List) data).add(junction.getNewInstance(ambiguousPhrases)); + ambiguousPhrases = null; + } + } + // Update the fielded term. + fieldedTerm = child; + } + break; + // The current child is a GROUP. + case GROUP: + // We have previously found a fielded term that may be the start of ambiguous phrases. + if (fieldedTerm != null) { + // Check if the group consists solely of unfielded OR'd phrases. + if (groupConsistsOfUnfieldedTerms((GroupQueryNode) child, true)) { + // It does. Ensure the list of ambiguous phrases is initialized with a copy of the fielded term as the first element. + if (ambiguousPhrases == null) { + ambiguousPhrases = new ArrayList<>(); + ambiguousPhrases.add(copy(fieldedTerm)); + } + // Add a copy of the group. + ambiguousPhrases.add(copy(child)); + } else { + // The group does not consist solely of unfielded OR'd phrases. If we found ambiguous phrases in the preceding terms, add a new + // OR node with the phrases to the data. Reset the list and fielded term. + if (ambiguousPhrases != null) { + ((List) data).add(junction.getNewInstance(ambiguousPhrases)); + ambiguousPhrases = null; + } + fieldedTerm = null; + // Examine the children of the GROUP node + super.visit(child, data); + } + } else { + // Check if the group consists solely of a fielded term followed by unfielded OR'd phrases. + if (groupConsistsOfUnfieldedTerms((GroupQueryNode) child, false)) { + // If it does, add a copy of it to the data. + ((List) data).add(copy(child)); + } else { + // Otherwise, examine the children of the GROUP node. + super.visit(child, data); + } + } + break; + default: + // If the child is any type other than a GROUP or FIELD, then this is the end of any previously found ambiguous phrases. Add a new OR node + // with the previously found phrases to the data, and then reset the list and fielded term. + if (ambiguousPhrases != null) { + ((List) data).add(junction.getNewInstance(ambiguousPhrases)); + ambiguousPhrases = null; + } + fieldedTerm = null; + // Examine the children of the child. + super.visit(child, data); + break; + } + } + + // If we have a list of ambiguous phrases after examining all the children, add a new OR node to the data. + if (ambiguousPhrases != null) { + ((List) data).add(junction.getNewInstance(ambiguousPhrases)); + } + + return data; + } + + /** + * Return whether the given {@link GroupQueryNode} consists entirely of ambiguously ORed unfielded phrases. + * + * @param node + * the group node + * @param fieldedTermFound + * whether a fielded term has already been found + * @return true if the group node consists of ambiguously ORed phrases, or false otherwise + */ + private boolean groupConsistsOfUnfieldedTerms(GroupQueryNode node, boolean fieldedTermFound) { + // A GROUP node will have just one child. + QueryNode child = node.getChild(); + QueryNodeType type = QueryNodeType.get(child.getClass()); + if (type == QueryNodeType.GROUP) { + // The child is a nested group. Examine it. + return groupConsistsOfUnfieldedTerms((GroupQueryNode) child, fieldedTermFound); + } else if (type == junction.getType()) { + // The child is an OR. Examine the OR's children. + return junctionConsistsOfUnfieldedTerms(child, fieldedTermFound); + } else if (type == QueryNodeType.FIELD) { + // If the child is a single field term, return true if it is unfielded and we have found a fieldedTerm. Otherwise, return false. + return fieldedTermFound && ((FieldQueryNode) child).getFieldAsString().isEmpty(); + } else { + // The child is not one of the target types we want.. + return false; + } + } + + /** + * Return whether the given {@link OrQueryNode} consists entirely of ambiguously ORed unfielded phrases. + * + * @param node + * the OR node + * @param fieldedTermFound + * whether a fielded term has already been found. + * @return true if the OR node consists of ambiguously ORed phrases, or false otherwise + */ + private boolean junctionConsistsOfUnfieldedTerms(QueryNode node, boolean fieldedTermFound) { + List children = node.getChildren(); + boolean unfieldedTermsFound = false; + boolean fieldTermFoundInGroupSibling = false; + // Examine the children. + for (QueryNode child : children) { + QueryNodeType type = QueryNodeType.get(child.getClass()); + // If the child is a group, check if it consists of ambiguously ORed phrases. + if (type == QueryNodeType.GROUP) { + // If we found the field term specifically in a previous GROUP sibling, the top-level group cannot consist of ambigously ORed unfielded phrases. + // Instead, we have something like ((FOO:abc OR def) OR (aaa OR bbb)) which cannot be flattened to FOO:(abc OR def OR aaa OR bbb). + if (fieldTermFoundInGroupSibling) { + return false; + } + if (groupConsistsOfUnfieldedTerms((GroupQueryNode) child, fieldedTermFound)) { + // If it does, we know the group is something like one of the following: + // (FOO:abc OR def). + // (abc OR def OR ghi) + if (!fieldedTermFound) { + fieldedTermFound = true; + fieldTermFoundInGroupSibling = true; + } + unfieldedTermsFound = true; + } else { + // If it does not, the top-level group does not consist solely of ambiguous phrases. + return false; + } + } else if (type == QueryNodeType.FIELD) { + // If the child is a field term, check if it is fielded or unfielded. + if (!((FieldQueryNode) child).getFieldAsString().isEmpty()) { + // If the field name is not empty, and we have not found a fielded term yet, mark that we've found one. + if (!fieldedTermFound) { + fieldedTermFound = true; + } else { + // If a fielded term was found previously, then we have may something like (FOO:abc OR BAR:abc). + return false; + } + } else { + // The current child is an unfielded term. If no fielded term has been found yet, then we may have something like (abc OR FOO:abc). + if (!fieldedTermFound) { + return false; + } else { + // Otherwise, mark that we've found an unfielded term. + unfieldedTermsFound = true; + } + } + } else { + return false; + } + } + // Return whether we found at least one unfielded term following the fielded term like (FOO:abc OR def). + return unfieldedTermsFound; + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/BaseVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/BaseVisitor.java new file mode 100644 index 00000000000..6c5a8d4a319 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/BaseVisitor.java @@ -0,0 +1,301 @@ +package datawave.query.lucene.visitors; + +import java.util.List; +import java.util.Map; + +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.AnyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.DeletedQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FunctionQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchAllDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchNoDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OpaqueQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PathQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PhraseSlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ProximityQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; +import org.apache.lucene.queryparser.flexible.standard.nodes.AbstractRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.BooleanModifierNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.SynonymQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode; +import org.apache.lucene.search.WildcardQuery; + +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; + +public class BaseVisitor { + + public static QueryNode copy(QueryNode node) { + try { + return node.cloneTree(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + } + + public Object visit(QueryNode node, Object data) { + // The class QueryNode does not have an accept method to support the visitor pattern. The switch below is a workaround to ensure that we call the + // correct visit() method based on the node's type. + QueryNodeType type = QueryNodeType.get(node.getClass().getName()); + if (type != null) { + switch (type) { + case AND: + return visit((AndQueryNode) node, data); + case ANY: + return visit((AnyQueryNode) node, data); + case FIELD: + return visit((FieldQueryNode) node, data); + case BOOLEAN: + return visit((BooleanQueryNode) node, data); + case BOOST: + return visit((BoostQueryNode) node, data); + case FUZZY: + return visit((FuzzyQueryNode) node, data); + case GROUP: + return visit((GroupQueryNode) node, data); + case MATCH_ALL_DOCS: + return visit((MatchAllDocsQueryNode) node, data); + case MATCH_NO_DOCS: + return visit((MatchNoDocsQueryNode) node, data); + case MODIFIER: + return visit((ModifierQueryNode) node, data); + case NO_TOKEN_FOUND: + return visit((NoTokenFoundQueryNode) node, data); + case OPAQUE: + return visit((OpaqueQueryNode) node, data); + case OR: + return visit((OrQueryNode) node, data); + case PATH: + return visit((PathQueryNode) node, data); + case PHRASE_SLOP: + return visit((PhraseSlopQueryNode) node, data); + case PROXIMITY: + return visit((ProximityQueryNode) node, data); + case QUOTED_FIELD: + return visit((QuotedFieldQueryNode) node, data); + case SLOP: + return visit((SlopQueryNode) node, data); + case TOKENIZED_PHRASE: + return visit((TokenizedPhraseQueryNode) node, data); + case ABSTRACT_RANGE: + return visit((AbstractRangeQueryNode) node, data); + case BOOLEAN_MODIFIER: + return visit((BooleanModifierNode) node, data); + case MULTI_PHRASE: + return visit((MultiPhraseQueryNode) node, data); + case POINT: + return visit((PointQueryNode) node, data); + case POINT_RANGE: + return visit((PointRangeQueryNode) node, data); + case PREFIX_WILDCARD: + return visit((PrefixWildcardQueryNode) node, data); + case REGEX: + return visit((RegexpQueryNode) node, data); + case SYNONYM: + return visit((SynonymQueryNode) node, data); + case TERM_RANGE: + return visit((TermRangeQueryNode) node, data); + case WILDCARD: + return visit((WildcardQueryNode) node, data); + case FUNCTION: + return visit((FunctionQueryNode) node, data); + case NOT_BOOLEAN: + return visit((NotBooleanQueryNode) node, data); + case DELETED: + return visit((DeletedQueryNode) node, data); + default: + throw new UnsupportedOperationException("No visit() method defined for " + QueryNodeType.class.getSimpleName() + " " + type); + } + } else { + throw new UnsupportedOperationException("No " + QueryNodeType.class.getSimpleName() + " constant defined for class " + node.getClass()); + } + } + + public Object visit(AndQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(AnyQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(FieldQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(BooleanQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(BoostQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(DeletedQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(FuzzyQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(GroupQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(MatchAllDocsQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(MatchNoDocsQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(ModifierQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(NoTokenFoundQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(OpaqueQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(OrQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(PathQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(PhraseSlopQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(ProximityQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(QuotedFieldQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(SlopQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(TokenizedPhraseQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(AbstractRangeQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(BooleanModifierNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(MultiPhraseQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(PointQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(PointRangeQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(PrefixWildcardQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(RegexpQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(SynonymQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(TermRangeQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(WildcardQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(FunctionQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + public Object visit(NotBooleanQueryNode node, Object data) { + visitChildren(node, data); + return data; + } + + protected void visitChildren(QueryNode node, Object data) { + List children = node.getChildren(); + if (children != null) { + for (QueryNode child : children) { + visit(child, data); + } + } + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidIncludeExcludeArgsVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidIncludeExcludeArgsVisitor.java new file mode 100644 index 00000000000..b326fd58d81 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidIncludeExcludeArgsVisitor.java @@ -0,0 +1,123 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import org.apache.lucene.queryparser.flexible.core.nodes.FunctionQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +import datawave.query.language.functions.jexl.Exclude; +import datawave.query.language.functions.jexl.Include; + +/** + * A {@link BaseVisitor} implementation that will check any {@code #INCLUDE} or {@code #EXCLUDE} functions in a query for invalid arguments. + */ +public class InvalidIncludeExcludeArgsVisitor extends BaseVisitor { + + private static final String OR = "or"; + private static final String AND = "and"; + + public enum REASON { + /** + * No arguments were supplied for the function, e.g. {@code #INCLUDE()}. + */ + NO_ARGS, + /** + * Uneven field/value pairings were supplied for the function, e.g. {@code #INCLUDE(FIELD1, value, FIELD2)}. + */ + UNEVEN_ARGS, + /** + * The first argument was "or" or "and", and no field/value pairs were supplied afterwards, e.g. {@code #INCLUDE(OR)}. + */ + NO_ARGS_AFTER_BOOLEAN, + /** + * The first argument was "or" or "and", and uneven field/value pairings were supplied afterwards, e.g. {@code #INCLUDE(OR, value)}. + */ + UNEVEN_ARGS_AFTER_BOOLEAN + } + + public static List check(QueryNode node) { + InvalidIncludeExcludeArgsVisitor visitor = new InvalidIncludeExcludeArgsVisitor(); + List invalidFunctions = new ArrayList<>(); + visitor.visit(node, invalidFunctions); + return invalidFunctions; + } + + @Override + public Object visit(FunctionQueryNode node, Object data) { + String name = node.getFunction(); + if (name.equalsIgnoreCase(Include.FUNCTION_NAME) || name.equalsIgnoreCase(Exclude.FUNCTION_NAME)) { + List args = node.getParameterList(); + if (!args.isEmpty()) { + String firstArg = args.get(0); + // The first argument is a boolean. + if (firstArg.equalsIgnoreCase(OR) || firstArg.equalsIgnoreCase(AND)) { + // No arguments were supplied after the boolean. + if (args.size() == 1) { + ((List) data).add(new InvalidFunction(name, args, REASON.NO_ARGS_AFTER_BOOLEAN)); + // Uneven field/value pairs were supplied after the boolean. + } else if (args.size() % 2 == 0) { + ((List) data).add(new InvalidFunction(name, args, REASON.UNEVEN_ARGS_AFTER_BOOLEAN)); + } + } else if (args.size() % 2 == 1) { + // Uneven field/value pairs were supplied. + ((List) data).add(new InvalidFunction(name, args, REASON.UNEVEN_ARGS)); + } + } else { + // No arguments were supplied. Currently the AccumuloSyntaxParser throws an exception when attempting to parse #INCLUDE() or #EXCLUDE(), so + // theoretically the args should never be empty. Put here in case that ever changes. + ((List) data).add(new InvalidFunction(name, args, REASON.NO_ARGS)); + } + } + return data; + } + + public static class InvalidFunction { + private final String name; + private final List args; + private final REASON reason; + + public InvalidFunction(String name, List args, REASON reason) { + this.name = name; + this.args = args; + this.reason = reason; + } + + public String getName() { + return name; + } + + public List getArgs() { + return args; + } + + public REASON getReason() { + return reason; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + InvalidFunction that = (InvalidFunction) object; + return Objects.equals(name, that.name) && Objects.equals(args, that.args) && reason == that.reason; + } + + @Override + public int hashCode() { + return Objects.hash(name, args, reason); + } + + @Override + public String toString() { + return new StringJoiner(", ", InvalidFunction.class.getSimpleName() + "[", "]").add("name='" + name + "'").add("args=" + args) + .add("reason=" + reason).toString(); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidQuoteVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidQuoteVisitor.java new file mode 100644 index 00000000000..4d386da8b45 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidQuoteVisitor.java @@ -0,0 +1,58 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FunctionQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +/** + * A visitor that will check a query for any quoted phrases that have the quote character {@value INVALID_QUOTE} instead of {@code '} + */ +public class InvalidQuoteVisitor extends BaseVisitor { + + public static final Character INVALID_QUOTE = '`'; + + /** + * Returns a list of copies of any phrases in the query that uses the quote character {@value INVALID_QUOTE} at either end instead of {@code '}. This also + * applies to any parameters in functions. + * + * @param query + * the query to examine + * @return a list of copies of phrases with invalid quotes + */ + public static List check(QueryNode query) { + InvalidQuoteVisitor visitor = new InvalidQuoteVisitor(); + return (List) visitor.visit(query, new ArrayList()); + } + + @Override + public Object visit(FieldQueryNode node, Object data) { + String text = node.getTextAsString(); + // Check if the string either starts with or ends with the invalid quote character. + if (containsInvalidQuote(text)) { + ((List) data).add(copy(node)); + } + return data; + } + + @Override + public Object visit(FunctionQueryNode node, Object data) { + // Check if any of the function arguments have invalid quotes. + for (String arg : node.getParameterList()) { + if (containsInvalidQuote(arg)) { + ((List) data).add(copy(node)); + break; + } + } + return data; + } + + private boolean containsInvalidQuote(String text) { + return !text.isEmpty() && (text.charAt(0) == INVALID_QUOTE || (text.charAt(text.length() - 1) == INVALID_QUOTE)); + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidSlopProximityVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidSlopProximityVisitor.java new file mode 100644 index 00000000000..254a2219013 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/InvalidSlopProximityVisitor.java @@ -0,0 +1,88 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; + +/** + * An visitor that checks a query for any slop phrases whose value is not at least the number of terms present. + */ +public class InvalidSlopProximityVisitor extends BaseVisitor { + + /** + * Returns copies of each {@link SlopQueryNode} that has a value that is less than the number of terms. + * + * @param node + * the node + * @return the copies + */ + public static List check(QueryNode node) { + InvalidSlopProximityVisitor visitor = new InvalidSlopProximityVisitor(); + List list = new ArrayList<>(); + visitor.visit(node, list); + return list; + } + + @Override + public Object visit(SlopQueryNode node, Object data) { + QuotedFieldQueryNode phrase = (QuotedFieldQueryNode) node.getChild(); + String text = phrase.getTextAsString(); + int totalTerms = getTotalTerms(text); + if (totalTerms > node.getValue()) { + ((List) data).add(new InvalidSlop((SlopQueryNode) copy(node), totalTerms)); + } + + return data; + } + + private int getTotalTerms(String string) { + return string.trim().split("\\s+").length; + } + + public static class InvalidSlop { + private final SlopQueryNode node; + private final int minimum; + + public InvalidSlop(SlopQueryNode node, int minimum) { + this.node = node; + this.minimum = minimum; + } + + public SlopQueryNode getNode() { + return node; + } + + public int getMinimum() { + return minimum; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (!(obj instanceof InvalidSlop)) + return false; + InvalidSlop other = (InvalidSlop) obj; + if (this.hashCode() != other.hashCode()) + return false; + return Objects.equals(this.node, other.node) && Objects.equals(this.getMinimum(), other.getMinimum()); + } + + @Override + public int hashCode() { + return Objects.hash(this.node, this.getMinimum()); + } + + @Override + public String toString() { + return new StringJoiner(", ", InvalidSlop.class.getSimpleName() + "[", "]").add("node=" + node).add("minimum=" + minimum).toString(); + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/LuceneQueryStringBuildingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/LuceneQueryStringBuildingVisitor.java new file mode 100644 index 00000000000..fc1e4bd1a04 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/LuceneQueryStringBuildingVisitor.java @@ -0,0 +1,477 @@ +package datawave.query.lucene.visitors; + +import java.util.List; +import java.util.Locale; + +import org.apache.commons.lang3.StringUtils; +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.AnyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.DeletedQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldValuePairQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FunctionQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.LuceneQueryNodeHelper; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchAllDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchNoDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OpaqueQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PathQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PhraseSlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ProximityQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; +import org.apache.lucene.queryparser.flexible.standard.nodes.AbstractRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.BooleanModifierNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.SynonymQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode; + +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; + +/** + * A visitor implementation that returns a formatted LUCENE query string from a given QueryNode. This visitor acts as an equivalent to + * {@link QueryNode#toQueryString(EscapeQuerySyntax)} with some differences: + *
      + *
    • Unfielded terms such as in {@code 'FOO:abc def'} will be formatted to {@code 'FOO:abc def'} instead of {@code 'FOO:abc :def'}.
    • + *
    • The NOT operator such as {@code 'FOO:abc NOT BAR:def'} will be formatted to {@code 'FOO:abc NOT BAR:def'} instead of {@code 'FOO:abc -BAR:def'}.
    • + *
    + */ +public class LuceneQueryStringBuildingVisitor extends BaseVisitor { + + private static final EscapeQuerySyntax escapedSyntax = new EscapeQuerySyntaxImpl(); + + public static String build(QueryNode node) { + LuceneQueryStringBuildingVisitor visitor = new LuceneQueryStringBuildingVisitor(); + return ((StringBuilder) visitor.visit(node, new StringBuilder())).toString(); + } + + @Override + public Object visit(AndQueryNode node, Object data) { + return visitJunctionNode(node, data, " AND "); + } + + @Override + public Object visit(AnyQueryNode node, Object data) { + StringBuilder sb = (StringBuilder) data; + String field = node.getFieldAsString(); + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":("); + } + + sb.append("( "); + sb.append(joinChildren(node, " ")); + sb.append(" )"); + sb.append(" ANY "); + sb.append(node.getMinimumMatchingElements()); + + if (!isDefaultField) { + sb.append(")"); + } + return sb; + } + + @Override + public Object visit(FieldQueryNode node, Object data) { + return visitField(node, data, false); + } + + private Object visitField(FieldQueryNode node, Object data, boolean ignoreField) { + String field = node.getFieldAsString(); + StringBuilder sb = (StringBuilder) data; + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":"); + } + sb.append(escape(node.getText(), Locale.getDefault(), EscapeQuerySyntax.Type.NORMAL)); + return sb; + } + + @Override + public Object visit(BooleanQueryNode node, Object data) { + return visitJunctionNode(node, data, " "); + } + + @Override + public Object visit(BoostQueryNode node, Object data) { + StringBuilder sb = (StringBuilder) data; + QueryNode child = node.getChild(); + if (child != null) { + visit(child, sb); + sb.append("^"); + sb.append(getFloatStr(node.getValue())); + } + return sb; + } + + @Override + public Object visit(DeletedQueryNode node, Object data) { + ((StringBuilder) data).append("[DELETEDCHILD]"); + return data; + } + + @Override + public Object visit(FuzzyQueryNode node, Object data) { + String field = node.getFieldAsString(); + StringBuilder sb = (StringBuilder) data; + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":"); + } + sb.append(escape(node.getText(), Locale.getDefault(), EscapeQuerySyntax.Type.NORMAL)); + sb.append("~").append(node.getSimilarity()); + return sb; + } + + @Override + public Object visit(GroupQueryNode node, Object data) { + QueryNode child = node.getChild(); + if (child != null) { + StringBuilder sb = (StringBuilder) data; + sb.append("( "); + visit(child, sb); + sb.append(" )"); + } + return data; + } + + @Override + public Object visit(MatchAllDocsQueryNode node, Object data) { + ((StringBuilder) data).append("*:*"); + return data; + } + + @Override + public Object visit(MatchNoDocsQueryNode node, Object data) { + // MatchNoDocsQueryNode does not override toQueryString(). Default to behavior of its parent class DeletedQueryNode. + return visit((DeletedQueryNode) node, data); + } + + @Override + public Object visit(ModifierQueryNode node, Object data) { + QueryNode child = node.getChild(); + if (child != null) { + StringBuilder sb = (StringBuilder) data; + ModifierQueryNode.Modifier modifier = node.getModifier(); + if (child instanceof ModifierQueryNode) { + sb.append("("); + sb.append(modifier.toLargeString()); + visit(child, sb); + sb.append(")"); + } else { + sb.append(modifier.toLargeString()); + visit(child, sb); + } + } + return data; + } + + @Override + public Object visit(NoTokenFoundQueryNode node, Object data) { + ((StringBuilder) data).append("[NTF]"); + return data; + } + + @Override + public Object visit(OpaqueQueryNode node, Object data) { + StringBuilder sb = (StringBuilder) data; + sb.append("@"); + sb.append(node.getSchema()); + sb.append(":'"); + sb.append(node.getValue()); + sb.append("'"); + return sb; + } + + @Override + public Object visit(OrQueryNode node, Object data) { + return visitJunctionNode(node, data, " OR "); + } + + @Override + public Object visit(PathQueryNode node, Object data) { + StringBuilder sb = (StringBuilder) data; + sb.append("/").append(node.getFirstPathElement()); + for (PathQueryNode.QueryText element : node.getPathElements(1)) { + sb.append("/\"").append(escape(element.getValue(), Locale.getDefault(), EscapeQuerySyntax.Type.STRING)).append("\""); + } + return sb; + } + + @Override + public Object visit(PhraseSlopQueryNode node, Object data) { + QueryNode child = node.getChild(); + if (child != null) { + StringBuilder sb = (StringBuilder) data; + visit(child, sb); + sb.append("~"); + sb.append(getFloatStr(Float.valueOf(node.getValue()))); + } + return data; + } + + @Override + public Object visit(ProximityQueryNode node, Object data) { + StringBuilder sb = (StringBuilder) data; + String field = node.getFieldAsString(); + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":("); + } + sb.append("( "); + sb.append(joinChildren(node, " ")); + sb.append(" ) "); + ProximityQueryNode.Type proximityType = node.getProximityType(); + switch (proximityType) { + case PARAGRAPH: + sb.append("WITHIN PARAGRAPH"); + break; + case SENTENCE: + sb.append("WITHIN SENTENCE"); + break; + case NUMBER: + sb.append("WITHIN"); + break; + default: + sb.append(""); + } + if (node.getDistance() > -1) { + sb.append(" ").append(node.getDistance()); + } + if (node.isInOrder()) { + sb.append(" INORDER"); + } + if (!isDefaultField) { + sb.append(")"); + } + return sb; + } + + @Override + public Object visit(QuotedFieldQueryNode node, Object data) { + String field = node.getFieldAsString(); + StringBuilder sb = (StringBuilder) data; + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":"); + } + sb.append("\"").append(escape(node.getText(), Locale.getDefault(), EscapeQuerySyntax.Type.STRING)).append("\""); + return sb; + } + + @Override + public Object visit(SlopQueryNode node, Object data) { + QueryNode child = node.getChild(); + if (child != null) { + StringBuilder sb = (StringBuilder) data; + visit(child, sb); + sb.append("~"); + sb.append(getFloatStr(Float.valueOf(node.getValue()))); + } + return data; + } + + @Override + public Object visit(TokenizedPhraseQueryNode node, Object data) { + List children = node.getChildren(); + if (children != null && !children.isEmpty()) { + StringBuilder sb = (StringBuilder) data; + sb.append("[TP["); + sb.append(joinChildren(node, ",")); + sb.append("]]"); + } + return data; + } + + @Override + public Object visit(AbstractRangeQueryNode node, Object data) { + StringBuilder sb = (StringBuilder) data; + FieldValuePairQueryNode lowerBound = node.getLowerBound(); + FieldValuePairQueryNode upperBound = node.getUpperBound(); + if (node.isLowerInclusive()) { + sb.append("["); + } else { + sb.append("{"); + } + if (lowerBound != null) { + visit(lowerBound, sb); + } else { + sb.append("..."); + } + sb.append(' '); + if (upperBound != null) { + visit(upperBound, sb); + } else { + sb.append("..."); + } + if (node.isUpperInclusive()) { + sb.append("]"); + } else { + sb.append("}"); + } + return sb; + } + + @Override + public Object visit(BooleanModifierNode node, Object data) { + // BooleanModifierNode does not override toQueryString(). Default to behavior of parent class ModifierQueryNode. + return visit((ModifierQueryNode) node, data); + } + + @Override + public Object visit(MultiPhraseQueryNode node, Object data) { + List children = node.getChildren(); + if (children != null && !children.isEmpty()) { + StringBuilder sb = (StringBuilder) data; + sb.append("[MTP["); + sb.append(joinChildren(node, ",")); + sb.append("]]"); + } + return data; + } + + @Override + public Object visit(PointQueryNode node, Object data) { + String field = node.getField().toString(); + StringBuilder sb = (StringBuilder) data; + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":"); + } + sb.append(escape(node.getNumberFormat().format(node.getValue()), Locale.ROOT, EscapeQuerySyntax.Type.NORMAL)); + return sb; + } + + @Override + public Object visit(PointRangeQueryNode node, Object data) { + // PointRangeQueryNode does not override toQueryString(). Default to behavior of parent class AbstractRangeQueryNode. + return visit((AbstractRangeQueryNode) node, data); + } + + @Override + public Object visit(PrefixWildcardQueryNode node, Object data) { + // PrefixWildcardQueryNode does not override toQueryString(). Default to behavior of parent class WildcardQueryNode. + return visit((WildcardQueryNode) node, data); + } + + @Override + public Object visit(RegexpQueryNode node, Object data) { + String field = node.getField().toString(); + StringBuilder sb = (StringBuilder) data; + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":"); + } + sb.append("/").append(node.getText()).append("/"); + return sb; + } + + @Override + public Object visit(SynonymQueryNode node, Object data) { + // SynonymQueryNode does not override toQueryString(). Default to behavior of parent class BooleanQueryNode. + return visit((BooleanQueryNode) node, data); + } + + @Override + public Object visit(TermRangeQueryNode node, Object data) { + // TermRangeQueryNode does not override toQueryString(). Default to behavior of parent class AbstractRangeQueryNode. + return visit((AbstractRangeQueryNode) node, data); + } + + @Override + public Object visit(WildcardQueryNode node, Object data) { + String field = node.getField().toString(); + StringBuilder sb = (StringBuilder) data; + boolean isDefaultField = LuceneQueryNodeHelper.isDefaultField(node, node.getField()); + if (!isDefaultField) { + sb.append(field).append(":"); + } + sb.append(node.getText()); + return sb; + } + + @Override + public Object visit(FunctionQueryNode node, Object data) { + StringBuilder sb = (StringBuilder) data; + sb.append("#"); + sb.append(node.getFunction()); + sb.append("("); + String filler = ""; + for (String parameter : node.getParameterList()) { + sb.append(filler).append(escape(parameter, Locale.getDefault(), EscapeQuerySyntax.Type.NORMAL)); + filler = ", "; + } + sb.append(")"); + return sb; + } + + @Override + public Object visit(NotBooleanQueryNode node, Object data) { + // NotBooleanQueryNode does not override toQueryString(). Default to behavior of parent class BooleanQueryNode. + return visit((BooleanQueryNode) node, data); + } + + private Object visitJunctionNode(QueryNode node, Object data, String junction) { + StringBuilder sb = (StringBuilder) data; + List children = node.getChildren(); + if (children != null && !children.isEmpty()) { + boolean requiresGrouping = !isRootOrHasParentGroup(node); + if (requiresGrouping) { + sb.append("( "); + } + sb.append(joinChildren(node, junction)); + if (requiresGrouping) { + sb.append(" )"); + } + } + return sb; + } + + private String joinChildren(QueryNode node, String junction) { + List children = node.getChildren(); + if (children != null && !children.isEmpty()) { + StringBuilder sb = new StringBuilder(); + String filler = ""; + for (QueryNode child : children) { + sb.append(filler); + visit(child, sb); + filler = junction; + } + return sb.toString(); + } else { + return ""; + } + } + + private boolean isRootOrHasParentGroup(QueryNode node) { + QueryNode parent = node.getParent(); + return parent == null || parent instanceof GroupQueryNode; + } + + private CharSequence escape(CharSequence text, Locale locale, EscapeQuerySyntax.Type type) { + return escapedSyntax.escape(text, Locale.getDefault(), type); + } + + private String getFloatStr(Float floatValue) { + if (floatValue == floatValue.longValue()) { + return "" + floatValue.longValue(); + } else { + return "" + floatValue; + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/PrintingVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/PrintingVisitor.java new file mode 100644 index 00000000000..72ab692b348 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/PrintingVisitor.java @@ -0,0 +1,325 @@ +package datawave.query.lucene.visitors; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.util.Lists; +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.AnyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.DeletedQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FunctionQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchAllDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchNoDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OpaqueQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PathQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PhraseSlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ProximityQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.AbstractRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.BooleanModifierNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.SynonymQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode; + +import datawave.query.Constants; + +public class PrintingVisitor extends BaseVisitor { + + public interface Output { + void writeLine(String line); + } + + private static class PrintStreamOutput implements Output { + + private final PrintStream stream; + + private PrintStreamOutput(PrintStream stream) { + this.stream = stream; + } + + @Override + public void writeLine(String line) { + stream.println(line); + } + } + + private static class StringListOutput implements Output { + private final List lines; + + private StringListOutput() { + this.lines = new ArrayList<>(32); + } + + @Override + public void writeLine(String line) { + lines.add(line); + } + + public List getLines() { + return lines; + } + } + + public static List printToList(QueryNode node) { + return ((StringListOutput) printToOutput(node, new StringListOutput())).getLines(); + } + + public static void printToStdOut(QueryNode node) { + printToOutput(node, new PrintStreamOutput(System.out)); + } + + public static void printToStream(QueryNode node, PrintStream output) { + printToOutput(node, new PrintStreamOutput(output)); + } + + public static Output printToOutput(QueryNode node, Output output) { + PrintingVisitor visitor = new PrintingVisitor(output); + visitor.visit(node, Constants.EMPTY_STRING); + return output; + } + + private static final String PREFIX = " "; + + private Output output; + + public PrintingVisitor(Output output) { + this.output = output; + } + + @Override + public Object visit(AndQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(AnyQueryNode node, Object data) { + String line = formatProperties(node, "field", node.getFieldAsString(), "minimumMatchingElements", node.getMinimumMatchingElements()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(FieldQueryNode node, Object data) { + String line = formatProperties(node, "begin", node.getBegin(), "end", node.getEnd(), "field", node.getFieldAsString(), "text", node.getTextAsString()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(BooleanQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(BoostQueryNode node, Object data) { + String line = formatProperties(node, "value", node.getValue()); + return super.visit(node, data); + } + + @Override + public Object visit(DeletedQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(FuzzyQueryNode node, Object data) { + String line = formatProperties(node, "field", node.getFieldAsString(), "text", node.getTextAsString(), "similarity", node.getSimilarity()); + return super.visit(node, data); + } + + @Override + public Object visit(GroupQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(MatchAllDocsQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(MatchNoDocsQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(ModifierQueryNode node, Object data) { + String line = formatProperties(node, "modifier", node.getModifier()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(NoTokenFoundQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(OpaqueQueryNode node, Object data) { + String line = formatProperties(node, "schema", node.getSchema(), "value", node.getValue()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(OrQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(PathQueryNode node, Object data) { + List pathElements = node.getPathElements(); + Integer begin = null; + int end = 0; + String path = null; + for (PathQueryNode.QueryText element : pathElements) { + if (begin == null) { + begin = element.getBegin(); + } + end = element.getEnd(); + path = Constants.FORWARD_SLASH + element.getValue(); + } + + String line = formatProperties(node, "begin", begin, "end", end, "path", path); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(PhraseSlopQueryNode node, Object data) { + String line = formatProperties(node, "value", node.getValue()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(ProximityQueryNode node, Object data) { + String line = formatProperties(node, "distance", node.getDistance(), "field", node.getField(), "inorder", node.isInOrder(), "type", + node.getProximityType()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(QuotedFieldQueryNode node, Object data) { + String line = formatProperties(node, "begin", node.getBegin(), "end", node.getEnd(), "field", node.getFieldAsString(), "text", node.getTextAsString()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(SlopQueryNode node, Object data) { + String line = formatProperties(node, "value", node.getValue()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(TokenizedPhraseQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(AbstractRangeQueryNode node, Object data) { + String line = formatProperties(node, "lowerInclusive", node.isLowerInclusive(), "upperInclusive", node.isUpperInclusive()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(BooleanModifierNode node, Object data) { + String line = formatProperties(node, "modifier", node.getModifier()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(MultiPhraseQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(PointQueryNode node, Object data) { + String line = formatProperties(node, "field", node.getField(), "number", node.getNumberFormat().format(node.getValue())); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(PointRangeQueryNode node, Object data) { + String line = formatProperties(node, "lowerInclusive", node.isLowerInclusive(), "upperInclusive", node.isUpperInclusive()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(PrefixWildcardQueryNode node, Object data) { + String line = formatProperties(node, "begin", node.getBegin(), "end", node.getEnd(), "field", node.getFieldAsString(), "text", node.getTextAsString()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(RegexpQueryNode node, Object data) { + String line = formatProperties(node, "field", node.getField(), "text", node.getText()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(SynonymQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + @Override + public Object visit(TermRangeQueryNode node, Object data) { + String line = formatProperties(node, "lowerInclusive", node.isLowerInclusive(), "upperInclusive", node.isUpperInclusive()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(WildcardQueryNode node, Object data) { + String line = formatProperties(node, "begin", node.getBegin(), "end", node.getEnd(), "field", node.getFieldAsString(), "text", node.getTextAsString()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(FunctionQueryNode node, Object data) { + String line = formatProperties(node, "begin", node.getBegin(), "end", node.getEnd(), "function", node.getFunction(), "parameters", + node.getParameterList()); + return writeLineAndVisitChildren(node, data, line); + } + + @Override + public Object visit(NotBooleanQueryNode node, Object data) { + return writeNameAndVisitChildren(node, data); + } + + private Object writeNameAndVisitChildren(QueryNode node, Object data) { + return writeLineAndVisitChildren(node, data, node.getClass().getSimpleName()); + } + + private Object writeLineAndVisitChildren(QueryNode node, Object data, String line) { + String prefix = (String) data; + output.writeLine(prefix + line); + prefix = prefix + PREFIX; + visitChildren(node, prefix); + return null; + } + + private String formatProperties(QueryNode node, Object... properties) { + int arrLength = properties.length; + if (arrLength % 2 == 1) { + throw new IllegalArgumentException("Properties array must consist of property name followed by property value"); + } + StringBuilder sb = new StringBuilder(); + sb.append(node.getClass().getSimpleName()); + for (int i = 0; i < arrLength; i += 2) { + sb.append(Constants.COMMA).append(Constants.SPACE).append(properties[i]).append(Constants.EQUALS).append(properties[(i + 1)]); + } + return sb.toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/QueryNodeType.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/QueryNodeType.java new file mode 100644 index 00000000000..14177a48715 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/QueryNodeType.java @@ -0,0 +1,115 @@ +package datawave.query.lucene.visitors; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.AnyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.DeletedQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FunctionQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchAllDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchNoDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OpaqueQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PathQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PhraseSlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ProximityQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.AbstractRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.BooleanModifierNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.SynonymQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode; + +/** + * Encapsulates the various known concrete implementations of {@link QueryNode}. + */ +public enum QueryNodeType { + // @formatter:off + AND(AndQueryNode.class), + ANY(AnyQueryNode.class), + FIELD(FieldQueryNode.class), + BOOLEAN(BooleanQueryNode.class), + BOOST(BoostQueryNode.class), + DELETED(DeletedQueryNode.class), + FUZZY(FuzzyQueryNode.class), + GROUP(GroupQueryNode.class), + MATCH_ALL_DOCS(MatchAllDocsQueryNode.class), + MATCH_NO_DOCS(MatchNoDocsQueryNode.class), + MODIFIER(ModifierQueryNode.class), + NO_TOKEN_FOUND(NoTokenFoundQueryNode.class), + OPAQUE(OpaqueQueryNode.class), + OR(OrQueryNode.class), + PATH(PathQueryNode.class), + PHRASE_SLOP(PhraseSlopQueryNode.class), + PROXIMITY(ProximityQueryNode.class), + QUOTED_FIELD(QuotedFieldQueryNode.class), + SLOP(SlopQueryNode.class), + TOKENIZED_PHRASE(TokenizedPhraseQueryNode.class), + ABSTRACT_RANGE(AbstractRangeQueryNode.class), // Included because AbstractRangeQueryNode is not actually an abstract class. + BOOLEAN_MODIFIER(BooleanModifierNode.class), + MULTI_PHRASE(MultiPhraseQueryNode.class), + POINT(PointQueryNode.class), + POINT_RANGE(PointRangeQueryNode.class), + PREFIX_WILDCARD(PrefixWildcardQueryNode.class), + REGEX(RegexpQueryNode.class), + SYNONYM(SynonymQueryNode.class), + TERM_RANGE(TermRangeQueryNode.class), + WILDCARD(WildcardQueryNode.class), + FUNCTION(FunctionQueryNode.class), + NOT_BOOLEAN(NotBooleanQueryNode.class); + // @formatter:on + + private final String className; + + QueryNodeType(Class clazz) { + this.className = clazz.getName(); + } + + private static final Map ENUM_MAP; + static { + Map map = new HashMap<>(); + for (QueryNodeType type : QueryNodeType.values()) { + map.put(type.className, type); + } + ENUM_MAP = map; + } + + /** + * Returns the {@link QueryNodeType} for the given {@link QueryNode} class, or null if one does not exist. + * + * @param clazz + * the type + * @return the {@link QueryNodeType} + */ + public static QueryNodeType get(Class clazz) { + return get(clazz.getName()); + } + + /** + * Returns the {@link QueryNodeType} for the given class name, or null if one does not exist. + * + * @param className + * the class name + * @return the {@link QueryNodeType} + */ + public static QueryNodeType get(String className) { + return ENUM_MAP.get(className); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/UnescapedWildcardsInQuotedPhrasesVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/UnescapedWildcardsInQuotedPhrasesVisitor.java new file mode 100644 index 00000000000..e8b3a3028e8 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/UnescapedWildcardsInQuotedPhrasesVisitor.java @@ -0,0 +1,73 @@ +package datawave.query.lucene.visitors; + +import static datawave.query.Constants.ASTERISK_CHAR; +import static datawave.query.Constants.BACKSLASH_CHAR; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; + +/** + * A visitor that will check a LUCENE query for any unescaped wildcard characters in quoted phrases. + */ +public class UnescapedWildcardsInQuotedPhrasesVisitor extends BaseVisitor { + + /** + * Returns a copy of all {@link QuotedFieldQueryNode} nodes in the given tree that contain an unescaped wildcard in their phrase. + * + * @param query + * the query to examine + * @return the list of node copies + */ + public static List check(QueryNode query) { + UnescapedWildcardsInQuotedPhrasesVisitor visitor = new UnescapedWildcardsInQuotedPhrasesVisitor(); + return (List) visitor.visit(query, new ArrayList()); + } + + @Override + public Object visit(QuotedFieldQueryNode node, Object data) { + String text = node.getTextAsString(); + if (containsUnescapedWildcard(text)) { + ((List) data).add((QuotedFieldQueryNode) copy(node)); + } + return data; + } + + private boolean containsUnescapedWildcard(String text) { + if (text.isEmpty()) { + return false; + } + char[] chars = text.toCharArray(); + int totalChars = chars.length; + // Check whether the first character is a backslash. + boolean isPrevBackslash = false; + + // Examine each character in the string. + for (int currIndex = 0; currIndex < totalChars; currIndex++) { + char currChar = chars[currIndex]; + if (currChar == BACKSLASH_CHAR) { + // If the previous character was a blackslash, this is an escaped backslash. Reset the isPrevBacklash to false. + if (isPrevBackslash) { + isPrevBackslash = false; + } else { + // The current character is a backslash that escapes the next character. + isPrevBackslash = true; + } + } else if (currChar == ASTERISK_CHAR) { + // This is an escaped wildcard and can be ignored. Reset isPrevBackslash to false. + if (isPrevBackslash) { + isPrevBackslash = false; + } else { + return true; + } + } else { + isPrevBackslash = false; + } + } + + return false; + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/UnfieldedTermsVisitor.java b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/UnfieldedTermsVisitor.java new file mode 100644 index 00000000000..fe29c117e0f --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/lucene/visitors/UnfieldedTermsVisitor.java @@ -0,0 +1,34 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +/** + * A visitor implementation that will check a query for any unfielded terms, e.g. {@code "term1 term2 FIELD:term3"}. + */ +public class UnfieldedTermsVisitor extends BaseVisitor { + + /** + * Return any unfielded terms found within the query. + * + * @param query + * the query + * @return the list of unfielded terms + */ + public static List check(QueryNode query) { + UnfieldedTermsVisitor visitor = new UnfieldedTermsVisitor(); + return (List) visitor.visit(query, new ArrayList()); + } + + @Override + public Object visit(FieldQueryNode node, Object data) { + // Check if the term has a field. + if (node.getFieldAsString().isEmpty()) { + ((List) data).add(node.getTextAsString()); + } + return data; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java index 8fbebd477e8..cc64a1383f3 100644 --- a/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java +++ b/warehouse/query-core/src/main/java/datawave/query/planner/DefaultQueryPlanner.java @@ -159,6 +159,7 @@ import datawave.query.jexl.visitors.ValidateFilterFunctionVisitor; import datawave.query.jexl.visitors.order.OrderByCostVisitor; import datawave.query.jexl.visitors.whindex.WhindexVisitor; +import datawave.query.language.functions.jexl.Unique; import datawave.query.model.QueryModel; import datawave.query.planner.async.AbstractQueryPlannerCallable; import datawave.query.planner.async.FetchCompositeMetadata; @@ -182,6 +183,7 @@ import datawave.query.util.DateIndexHelper; import datawave.query.util.MetadataHelper; import datawave.query.util.QueryStopwatch; +import datawave.query.util.ShardQueryUtils; import datawave.query.util.Tuple2; import datawave.query.util.TypeMetadata; import datawave.util.time.TraceStopwatch; @@ -1377,7 +1379,7 @@ protected ASTJexlScript timedFixNegativeNumbers(QueryStopwatch timers, final AST protected ASTJexlScript timedUpperCaseIdentifiers(QueryStopwatch timers, final ASTJexlScript script, ShardQueryConfiguration config, MetadataHelper metadataHelper) throws DatawaveQueryException { - return visitorManager.timedVisit(timers, "Uppercase Field Names", () -> (upperCaseIdentifiers(metadataHelper, config, script))); + return visitorManager.timedVisit(timers, "Uppercase Field Names", () -> (ShardQueryUtils.upperCaseIdentifiers(metadataHelper, config, script))); } protected ASTJexlScript timedRewriteNegations(QueryStopwatch timers, final ASTJexlScript script) throws DatawaveQueryException { @@ -1834,135 +1836,22 @@ private void loadDataTypeMetadata(Multimap> fieldToDatatypeMap, S } } - protected Set upcase(Set fields) { - return fields.stream().map(s -> s.toUpperCase()).collect(Collectors.toSet()); - } - - protected ASTJexlScript upperCaseIdentifiers(MetadataHelper metadataHelper, ShardQueryConfiguration config, ASTJexlScript script) { - GroupFields groupFields = config.getGroupFields(); - if (groupFields != null && groupFields.hasGroupByFields()) { - groupFields.setMaxFields(upcase(groupFields.getMaxFields())); - groupFields.setSumFields(upcase(groupFields.getSumFields())); - groupFields.setGroupByFields(upcase(groupFields.getGroupByFields())); - groupFields.setAverageFields(upcase(groupFields.getAverageFields())); - groupFields.setCountFields(upcase(groupFields.getCountFields())); - groupFields.setMinFields(upcase(groupFields.getMinFields())); - - // If grouping is set, we must make the projection fields match all the group-by fields and aggregation fields. - config.setProjectFields(groupFields.getProjectionFields()); - } else { - Set projectFields = config.getProjectFields(); - - if (projectFields != null && !projectFields.isEmpty()) { - config.setProjectFields(upcase(projectFields)); - } - } - - UniqueFields uniqueFields = config.getUniqueFields(); - if (uniqueFields != null && !uniqueFields.isEmpty()) { - Sets.newHashSet(uniqueFields.getFields()).stream().forEach(s -> uniqueFields.replace(s, s.toUpperCase())); - } - - ExcerptFields excerptFields = config.getExcerptFields(); - if (excerptFields != null && !excerptFields.isEmpty()) { - Sets.newHashSet(excerptFields.getFields()).stream().forEach(s -> excerptFields.replace(s, s.toUpperCase())); - } - - Set userProjection = config.getRenameFields(); - if (userProjection != null && !userProjection.isEmpty()) { - config.setRenameFields(upcase(userProjection)); - } - - Set disallowlistedFields = config.getDisallowlistedFields(); - if (disallowlistedFields != null && !disallowlistedFields.isEmpty()) { - config.setDisallowlistedFields(upcase(disallowlistedFields)); - } - - Set limitFields = config.getLimitFields(); - if (limitFields != null && !limitFields.isEmpty()) { - config.setLimitFields(upcase(limitFields)); - } - - return (CaseSensitivityVisitor.upperCaseIdentifiers(config, metadataHelper, script)); - } - - // Overwrite projection and disallowlist properties if the query model is - // being used + /** + * Apply the query model to the given query script and query configuration, using the set of all fields cached in allFieldTypeMap if cacheDataTypes is true, + * or from {@link MetadataHelper#getAllFields(Set)} otherwise. + * + * @param metadataHelper + * the metadata helper + * @param config + * the query config + * @param script + * the query script + * @param queryModel + * the query model + * @return + */ protected ASTJexlScript applyQueryModel(MetadataHelper metadataHelper, ShardQueryConfiguration config, ASTJexlScript script, QueryModel queryModel) { - // generate the inverse of the reverse mapping; {display field name - // => db field name} - // a reverse mapping is always many to one, therefore the inverted - // reverse mapping - // can be one to many - Multimap inverseReverseModel = invertMultimap(queryModel.getReverseQueryMapping()); - - inverseReverseModel.putAll(queryModel.getForwardQueryMapping()); - Collection projectFields = config.getProjectFields(), disallowlistedFields = config.getDisallowlistedFields(), - limitFields = config.getLimitFields(); - - if (projectFields != null && !projectFields.isEmpty()) { - projectFields = queryModel.remapParameter(projectFields, inverseReverseModel); - if (log.isTraceEnabled()) { - log.trace("Updated projection set using query model to: " + projectFields); - } - config.setProjectFields(Sets.newHashSet(projectFields)); - } - - GroupFields groupFields = config.getGroupFields(); - if (groupFields != null && groupFields.hasGroupByFields()) { - groupFields.remapFields(inverseReverseModel, queryModel.getReverseQueryMapping()); - if (log.isTraceEnabled()) { - log.trace("Updating group-by fields using query model to " + groupFields); - } - config.setGroupFields(groupFields); - - // If grouping is set, we must make the projection fields match all the group-by fields and aggregation fields. - config.setProjectFields(groupFields.getProjectionFields()); - } - - UniqueFields uniqueFields = config.getUniqueFields(); - if (uniqueFields != null && !uniqueFields.isEmpty()) { - uniqueFields.remapFields(inverseReverseModel); - if (log.isTraceEnabled()) { - log.trace("Updated unique set using query model to: " + uniqueFields.getFields()); - } - config.setUniqueFields(uniqueFields); - } - - ExcerptFields excerptFields = config.getExcerptFields(); - if (excerptFields != null && !excerptFields.isEmpty()) { - excerptFields.expandFields(inverseReverseModel); - if (log.isTraceEnabled()) { - log.trace("Updated excerpt fields using query model to " + excerptFields.getFields()); - } - config.setExcerptFields(excerptFields); - } - - Set userProjection = config.getRenameFields(); - if (userProjection != null && !userProjection.isEmpty()) { - userProjection = Sets.newHashSet(queryModel.remapParameterEquation(userProjection, inverseReverseModel)); - if (log.isTraceEnabled()) { - log.trace("Updated user projection fields using query model to " + userProjection); - } - config.setRenameFields(userProjection); - } - - if (config.getDisallowlistedFields() != null && !config.getDisallowlistedFields().isEmpty()) { - disallowlistedFields = queryModel.remapParameter(disallowlistedFields, inverseReverseModel); - if (log.isTraceEnabled()) { - log.trace("Updated disallowlist set using query model to: " + disallowlistedFields); - } - config.setDisallowlistedFields(Sets.newHashSet(disallowlistedFields)); - } - - if (config.getLimitFields() != null && !config.getLimitFields().isEmpty()) { - limitFields = queryModel.remapParameterEquation(limitFields, inverseReverseModel); - if (log.isTraceEnabled()) { - log.trace("Updated limitFields set using query model to: " + limitFields); - } - config.setLimitFields(Sets.newHashSet(limitFields)); - } - + // Establish the set of all fields to use when applying the query model. Set dataTypes = config.getDatatypeFilter(); Set allFields = null; try { @@ -1972,8 +1861,9 @@ protected ASTJexlScript applyQueryModel(MetadataHelper metadataHelper, ShardQuer } if (null == allFields) { allFields = metadataHelper.getAllFields(dataTypes); - if (cacheDataTypes) + if (cacheDataTypes) { allFieldTypeMap.put(dataTypeHash, allFields); + } } if (log.isTraceEnabled()) { @@ -2001,8 +1891,7 @@ protected ASTJexlScript applyQueryModel(MetadataHelper metadataHelper, ShardQuer throw new DatawaveFatalQueryException(qe); } - return (QueryModelVisitor.applyModel(script, queryModel, allFields, config.getNoExpansionFields(), config.getLenientFields(), - config.getStrictFields())); + return ShardQueryUtils.applyQueryModel(script, config, allFields, queryModel); } /** @@ -2449,14 +2338,15 @@ protected IteratorSetting getQueryIterator(MetadataHelper metadataHelper, ShardQ boolean isPreload) throws DatawaveQueryException { if (null == settingFuture) settingFuture = loadQueryIterator(metadataHelper, config, isFullTable, isPreload); - if (settingFuture.isDone()) + if (settingFuture.isDone()) { try { return settingFuture.get(); } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e.getCause()); } - else + } else { return null; + } } public void configureTypeMappings(ShardQueryConfiguration config, IteratorSetting cfg, MetadataHelper metadataHelper, boolean compressMappings) diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/AbstractQueryRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/AbstractQueryRule.java new file mode 100644 index 00000000000..0238b5cebfe --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/AbstractQueryRule.java @@ -0,0 +1,50 @@ +package datawave.query.rules; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; +import java.util.stream.Collectors; + +public abstract class AbstractQueryRule implements QueryRule { + + protected String name; + + @Override + public String getName() { + return name; + } + + @Override + public void setName(String name) { + this.name = name; + } + + public AbstractQueryRule() {} + + public AbstractQueryRule(String name) { + this.name = name; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + AbstractQueryRule that = (AbstractQueryRule) object; + return Objects.equals(name, that.name); + } + + @Override + public int hashCode() { + return Objects.hash(name); + } + + @Override + public String toString() { + return new StringJoiner(", ", getClass().getSimpleName() + "[", "]").add("name='" + name + "'").toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousNotRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousNotRule.java new file mode 100644 index 00000000000..8b04d22adda --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousNotRule.java @@ -0,0 +1,91 @@ +package datawave.query.rules; + +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; +import java.util.stream.Collectors; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; + +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; +import datawave.query.lucene.visitors.AmbigiousNotVisitor; +import datawave.query.lucene.visitors.LuceneQueryStringBuildingVisitor; + +/** + * An implementation of {@link QueryRule} that checks a LUCENE query for any usage of NOT with OR'd/AND'd terms before it that are not wrapped, e.g. * + * {@code FIELD1:abc OR FIELD2:def NOT FIELD3:123} should be {@code (FIELD1:abc OR FIELD2:def) NOT FIELD3:123}. + */ +public class AmbiguousNotRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(AmbiguousNotRule.class); + private static final EscapeQuerySyntax escapedSyntax = new EscapeQuerySyntaxImpl(); + + public AmbiguousNotRule() {} + + public AmbiguousNotRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Check the query for any ambiguous usage of NOT. + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List nodes = AmbigiousNotVisitor.check(luceneQuery); + // Add a message for each ambiguous NOT. + nodes.stream().map(this::formatMessage).forEach(result::addMessage); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new AmbiguousNotRule(name); + } + + // Return a message about the given node. + private String formatMessage(NotBooleanQueryNode node) { + StringBuilder sb = new StringBuilder(); + sb.append("Ambiguous usage of NOT detected with multiple unwrapped preceding terms: "); + + String precedingTerms = getPrecedingTerms(node); + // @formatter:off + return sb.append("\"") + .append(precedingTerms) + .append(" NOT\" should be \"(") + .append(precedingTerms) + .append(") NOT\".") + .toString(); + // @formatter:on + } + + // Return the terms preceding the NOT in the given node as a nicely formatted query string. + private String getPrecedingTerms(NotBooleanQueryNode node) { + QueryNode junctionNode = node.getChildren().get(0); + String junction = junctionNode instanceof AndQueryNode ? " AND " : " OR "; + // @formatter:off + return junctionNode.getChildren().stream() + .map(child -> LuceneQueryStringBuildingVisitor.build(child)) + .collect(Collectors.joining(junction)); + // @formatter:on + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousOrPhrasesRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousOrPhrasesRule.java new file mode 100644 index 00000000000..7233edebb99 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousOrPhrasesRule.java @@ -0,0 +1,97 @@ +package datawave.query.rules; + +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; + +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; +import datawave.query.lucene.visitors.AmbiguousUnfieldedTermsVisitor; +import datawave.query.lucene.visitors.BaseVisitor; +import datawave.query.lucene.visitors.LuceneQueryStringBuildingVisitor; + +/** + * An implementation of {@link QueryRule} that checks a LUCENE query for any fielded terms with unfielded terms directly ORed with it afterwards. For example: + *
      + *
    • {@code FOO:abc OR def} should be {@code FOO:(abc OR def)}
    • + *
    • {@code (FOO:abc OR def)} should be {@code FOO:(abc OR def)}
    • + *
    • {@code FOO:abc OR (def OR ghi)} should be {@code FOO:(abc OR def OR ghi)}
    • + *
    + */ +public class AmbiguousOrPhrasesRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(AmbiguousOrPhrasesRule.class); + private static final EscapeQuerySyntax escapedSyntax = new EscapeQuerySyntaxImpl(); + + public AmbiguousOrPhrasesRule() {} + + public AmbiguousOrPhrasesRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Check the query for any ambiguous OR'd unfielded phrases. + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List nodes = AmbiguousUnfieldedTermsVisitor.check(luceneQuery, AmbiguousUnfieldedTermsVisitor.JUNCTION.OR); + // Add a message for each ambiguous node. + nodes.stream().map(this::formatMessage).forEach(result::addMessage); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new AmbiguousOrPhrasesRule(name); + } + + // Returns a formatted message for the node. + private String formatMessage(QueryNode node) { + // @formatter:off + return new StringBuilder() + .append("Ambiguous unfielded terms OR'd with fielded term detected: ") + .append(LuceneQueryStringBuildingVisitor.build(node)) + .append(" Recommended: ") + .append(CorrectFormatVisitor.format(node)) + .toString(); + // @formatter:on + } + + private static class CorrectFormatVisitor extends BaseVisitor { + + private static String format(QueryNode node) { + CorrectFormatVisitor visitor = new CorrectFormatVisitor(); + return ((StringBuilder) visitor.visit(node, new StringBuilder())).append(")").toString(); + } + + @Override + public Object visit(FieldQueryNode node, Object data) { + String field = node.getFieldAsString(); + if (field.isEmpty()) { + ((StringBuilder) data).append(" OR " + node.getTextAsString()); + } else { + ((StringBuilder) data).append(field).append(":(").append(node.getTextAsString()); + } + return data; + } + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousUnquotedPhrasesRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousUnquotedPhrasesRule.java new file mode 100644 index 00000000000..9408ff7ebd7 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/AmbiguousUnquotedPhrasesRule.java @@ -0,0 +1,96 @@ +package datawave.query.rules; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; + +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; +import datawave.query.lucene.visitors.AmbiguousUnfieldedTermsVisitor; +import datawave.query.lucene.visitors.BaseVisitor; +import datawave.query.lucene.visitors.LuceneQueryStringBuildingVisitor; +import datawave.query.lucene.visitors.QueryNodeType; + +/** + * An implementation of {@link QueryRule} that checks a LUCENE query for any unquoted phrases that are implicitly ANDED with a preceding fielded terms, e.g. + * {@code FOO:term1 term2 term3} should be {@code FOO:"term1 term2 term3"}. + */ +public class AmbiguousUnquotedPhrasesRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(AmbiguousUnquotedPhrasesRule.class); + private static final EscapeQuerySyntax escapedSyntax = new EscapeQuerySyntaxImpl(); + + public AmbiguousUnquotedPhrasesRule() {} + + public AmbiguousUnquotedPhrasesRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List nodes = AmbiguousUnfieldedTermsVisitor.check(luceneQuery, AmbiguousUnfieldedTermsVisitor.JUNCTION.AND); + nodes.stream().map(this::formatMessage).forEach(result::addMessage); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new AmbiguousUnquotedPhrasesRule(name); + } + + // Return a message about the given nodes. + private String formatMessage(QueryNode node) { + // @formatter:off + return new StringBuilder() + .append("Ambiguous unfielded terms AND'd with fielded term detected: ") + .append(LuceneQueryStringBuildingVisitor.build(node)) + .append(". Recommended: ") + .append(CorrectFormatVisitor.format(node)) + .toString(); + // @formatter:on + } + + private static class CorrectFormatVisitor extends BaseVisitor { + + private static String format(QueryNode node) { + CorrectFormatVisitor visitor = new CorrectFormatVisitor(); + return ((StringBuilder) visitor.visit(node, new StringBuilder())).append("\"").toString(); + } + + @Override + public Object visit(FieldQueryNode node, Object data) { + String field = node.getFieldAsString(); + if (field.isEmpty()) { + ((StringBuilder) data).append(" " + node.getTextAsString()); + } else { + ((StringBuilder) data).append(field).append(":\"").append(node.getTextAsString()); + } + return data; + } + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/FieldExistenceRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/FieldExistenceRule.java new file mode 100644 index 00000000000..dd21328e50e --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/FieldExistenceRule.java @@ -0,0 +1,122 @@ +package datawave.query.rules; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; +import java.util.stream.Collectors; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.log4j.Logger; + +import datawave.query.jexl.visitors.FieldMissingFromSchemaVisitor; + +/** + * A {@link QueryRule} implementation that will check a query for any non-existent fields, i.e. not present in the data dictionary. + */ +public class FieldExistenceRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(FieldExistenceRule.class); + + private Set specialFields = Collections.emptySet(); + + public FieldExistenceRule() {} + + public FieldExistenceRule(String name) { + super(name); + } + + public FieldExistenceRule(String name, Set specialFields) { + super(name); + this.specialFields = specialFields == null ? Set.of() : Set.copyOf(specialFields); + } + + /** + * Returns the set of fields that will not trigger a notice if they are present in the query and not in the data dictionary. + */ + public Set getSpecialFields() { + return specialFields; + } + + /** + * Sets the fields that will not trigger a notice if they are present in the query and not in the data dictionary. If the given collection is null, an empty + * set will be used. Otherwise, the given collection will be copied, trimmed, and capitalized. + * + * @param specialFields + * the field exceptions + */ + public void setSpecialFields(Set specialFields) { + if (specialFields == null) { + this.specialFields = Collections.emptySet(); + } else { + // @formatter:off + // Ensure any configured fields are trimmed and capitalized. + this.specialFields = specialFields.stream() + .map(String::trim) + .map(String::toUpperCase) + .collect(Collectors.toUnmodifiableSet()); + // @formatter:on + } + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.JEXL; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration ruleConfiguration) throws Exception { + ShardQueryValidationConfiguration ruleConfig = (ShardQueryValidationConfiguration) ruleConfiguration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + ruleConfig); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Fetch the set of non-existent fields. + ASTJexlScript jexlQuery = (ASTJexlScript) ruleConfig.getParsedQuery(); + Set nonExistentFields = FieldMissingFromSchemaVisitor.getNonExistentFields(ruleConfig.getMetadataHelper(), jexlQuery, + Collections.emptySet(), getSpecialFields()); + // If any non-existent fields were found, add them to the result. + if (!nonExistentFields.isEmpty()) { + result.addMessage("Fields not found in data dictionary: " + String.join(", ", nonExistentFields)); + } + } catch (Exception e) { + // If an exception occurred, log and preserve it in the result. + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new FieldExistenceRule(name, specialFields); + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + if (!super.equals(object)) { + return false; + } + FieldExistenceRule rule = (FieldExistenceRule) object; + return Objects.equals(specialFields, rule.specialFields); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), specialFields); + } + + @Override + public String toString() { + return new StringJoiner(", ", FieldExistenceRule.class.getSimpleName() + "[", "]").add("name='" + name + "'").add("specialFields=" + specialFields) + .toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/FieldPatternPresenceRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/FieldPatternPresenceRule.java new file mode 100644 index 00000000000..434cbe96bab --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/FieldPatternPresenceRule.java @@ -0,0 +1,160 @@ +package datawave.query.rules; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; +import java.util.stream.Collectors; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.log4j.Logger; + +import com.google.common.collect.Sets; + +import datawave.query.jexl.visitors.QueryFieldsVisitor; +import datawave.query.jexl.visitors.QueryPatternsVisitor; + +/** + * A {@link QueryRule} implementation that will check return configured messages if any of the configured fields or regex patterns are seen. + */ +public class FieldPatternPresenceRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(FieldPatternPresenceRule.class); + + private Map fieldMessages = Collections.emptyMap(); + private Map patternMessages = Collections.emptyMap(); + + public FieldPatternPresenceRule() {} + + public FieldPatternPresenceRule(String name) { + super(name); + } + + public FieldPatternPresenceRule(String name, Map fieldMessages, Map patternMessages) { + super(name); + this.fieldMessages = fieldMessages == null ? Map.of() : Map.copyOf(fieldMessages); + this.patternMessages = patternMessages == null ? Map.of() : Map.copyOf(patternMessages); + } + + /** + * Return the map of fields to messages + * + * @return the field message map + */ + public Map getFieldMessages() { + return fieldMessages; + } + + /** + * Sets the map of fields to messages that should be returned if the field keys are seen in a query. If the map is null, an empty map will be set, otherwise + * the map will be copied and each field key will be trimmed and capitalized. + * + * @param fieldMessages + * the field messages + */ + public void setFieldMessages(Map fieldMessages) { + if (fieldMessages == null) { + this.fieldMessages = Collections.emptyMap(); + } else { + // @formatter:off + // Ensured any configured field keys are trimmed and uppercased. + this.fieldMessages = fieldMessages.entrySet().stream() + .collect(Collectors.toUnmodifiableMap( + entry -> entry.getKey().trim().toUpperCase(), + entry -> entry.getValue())); + // @formatter:on + } + } + + public Map getPatternMessages() { + return patternMessages; + } + + /** + * Sets the map of fields to messages that should be returned if the field keys are seen in a query + * + * @param fieldMessages + * the field messages + */ + public void setPatternMessages(Map patternMessages) { + if (patternMessages == null) { + this.patternMessages = Collections.emptyMap(); + } else { + this.patternMessages = Collections.unmodifiableMap(patternMessages); + } + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.JEXL; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration ruleConfiguration) throws Exception { + ShardQueryValidationConfiguration ruleConfig = (ShardQueryValidationConfiguration) ruleConfiguration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + ruleConfig); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + ASTJexlScript jexlScript = (ASTJexlScript) ruleConfig.getParsedQuery(); + // Fetch the set of fields if any field presence messages were configured. + if (!fieldMessages.isEmpty()) { + Set fields = QueryFieldsVisitor.parseQueryFields(jexlScript, ruleConfig.getMetadataHelper()); + // @formatter:off + Sets.intersection(fieldMessages.keySet(), fields).stream() // Get the matching fields. + .map(fieldMessages::get) // Fetch their associated message. + .forEach(result::addMessage); // Add the message to the result. + // @formatter:on + } + + // Fetch the set of patterns if any pattern messages were configured. + if (!patternMessages.isEmpty()) { + Set patterns = QueryPatternsVisitor.findPatterns(jexlScript); + // @formatter:off + Sets.intersection(patternMessages.keySet(), patterns).stream() // Get the matching pattern. + .map(patternMessages::get) // Fetch their associated message. + .forEach(result::addMessage); // Add the message to the result. + // @formatter:on + } + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + + return result; + } + + @Override + public QueryRule copy() { + return new FieldPatternPresenceRule(name, fieldMessages, patternMessages); + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + if (!super.equals(object)) { + return false; + } + FieldPatternPresenceRule rule = (FieldPatternPresenceRule) object; + return Objects.equals(fieldMessages, rule.fieldMessages) && Objects.equals(patternMessages, rule.patternMessages); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), fieldMessages, patternMessages); + } + + @Override + public String toString() { + return new StringJoiner(", ", FieldPatternPresenceRule.class.getSimpleName() + "[", "]").add("name='" + name + "'") + .add("fieldMessages=" + fieldMessages).add("patternMessages=" + patternMessages).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/IncludeExcludeArgsRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/IncludeExcludeArgsRule.java new file mode 100644 index 00000000000..f42261cf4f0 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/IncludeExcludeArgsRule.java @@ -0,0 +1,104 @@ +package datawave.query.rules; + +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +import datawave.query.lucene.visitors.InvalidIncludeExcludeArgsVisitor; + +/** + * A {@link QueryRule} implementation that validates the arguments of any {@code #INCLUDE} or {@link #EXCLUDE} functions found within a LUCENE query. + */ +public class IncludeExcludeArgsRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(IncludeExcludeArgsRule.class); + + private static final String NO_ARGS_MESSAGE = "Function #%s supplied with no arguments. Must supply at least a field and value, e.g. #%s(FIELD, 'value')."; + private static final String UNEVEN_ARGS_MESSAGE = "Function #%s supplied with uneven number of arguments. Must supply field/value pairs, e.g. " + + "#%s(FIELD, 'value') or #%s(FIELD1, 'value1', FIELD2, 'value2')."; + private static final String NO_ARGS_AFTER_BOOLEAN_MESSAGE = "Function #%s supplied with no arguments after the first boolean arg %s. " + + "Must supply at least a field and value after the first boolean arg, e.g. #%s(%s, FIELD, 'value')."; + private static final String UNEVEN_ARGS_AFTER_BOOLEAN_MESSAGE = "Function #%s supplied with uneven number of arguments after the first boolean arg %s. " + + "Must supply field/value after the boolean, e.g. #%s(%s, FIELD, 'value') or #%s(%s, FIELD1, 'value1',' FIELD2, 'value2')."; + + public IncludeExcludeArgsRule() {} + + public IncludeExcludeArgsRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Check the query for any #INCLUDE or #EXCLUDE functions with invalid arguments. + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List functions = InvalidIncludeExcludeArgsVisitor.check(luceneQuery); + functions.stream().map(this::formatMessage).forEach(result::addMessage); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new IncludeExcludeArgsRule(name); + } + + // Return a formatted message that is specific to the reason. + private String formatMessage(InvalidIncludeExcludeArgsVisitor.InvalidFunction function) { + switch (function.getReason()) { + case NO_ARGS: + return formatNoArgsMessage(function); + case UNEVEN_ARGS: + return formatUnevenArgsMessage(function); + case NO_ARGS_AFTER_BOOLEAN: + return formatNoArgsAfterBooleanMessage(function); + case UNEVEN_ARGS_AFTER_BOOLEAN: + return formatUnevenArgsAfterBooleanMessage(function); + default: + throw new IllegalArgumentException("No message configured for scenario " + function.getReason()); + } + } + + // Return a formatted message for a no-args scenario. + private String formatNoArgsMessage(InvalidIncludeExcludeArgsVisitor.InvalidFunction function) { + String name = function.getName(); + return String.format(NO_ARGS_MESSAGE, name, name); + } + + // Return a formatted messge for an uneven args scenario. + private String formatUnevenArgsMessage(InvalidIncludeExcludeArgsVisitor.InvalidFunction function) { + String name = function.getName(); + return String.format(UNEVEN_ARGS_MESSAGE, name, name, name); + } + + // Return a formatted messge for a no args after a boolean scenario. + private String formatNoArgsAfterBooleanMessage(InvalidIncludeExcludeArgsVisitor.InvalidFunction function) { + String name = function.getName(); + String booleanArg = function.getArgs().get(0); + return String.format(NO_ARGS_AFTER_BOOLEAN_MESSAGE, name, booleanArg, name, booleanArg); + } + + // Returns a formatted message for an uneven args after a boolean scenario. + private String formatUnevenArgsAfterBooleanMessage(InvalidIncludeExcludeArgsVisitor.InvalidFunction function) { + String name = function.getName(); + String booleanArg = function.getArgs().get(0); + return String.format(UNEVEN_ARGS_AFTER_BOOLEAN_MESSAGE, name, booleanArg, name, booleanArg, name, booleanArg); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/IncludeExcludeIndexFieldsRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/IncludeExcludeIndexFieldsRule.java new file mode 100644 index 00000000000..422411eedfd --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/IncludeExcludeIndexFieldsRule.java @@ -0,0 +1,80 @@ +package datawave.query.rules; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.log4j.Logger; + +import com.google.common.collect.Sets; + +import datawave.query.jexl.functions.EvaluationPhaseFilterFunctions; +import datawave.query.jexl.functions.EvaluationPhaseFilterFunctionsDescriptor; +import datawave.query.jexl.visitors.FetchFunctionFieldsVisitor; +import datawave.query.util.MetadataHelper; + +/** + * A {@link QueryRule} implementation that will check if any indexed fields are used within the functions {@code filter:includeRegex} or + * {@code filter:excludeRegex} in a query. + */ +public class IncludeExcludeIndexFieldsRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(IncludeExcludeIndexFieldsRule.class); + + private static final Set> functions = Collections.unmodifiableSet(Sets.newHashSet( + Pair.of(EvaluationPhaseFilterFunctions.EVAL_PHASE_FUNCTION_NAMESPACE, EvaluationPhaseFilterFunctionsDescriptor.INCLUDE_REGEX), + Pair.of(EvaluationPhaseFilterFunctions.EVAL_PHASE_FUNCTION_NAMESPACE, EvaluationPhaseFilterFunctionsDescriptor.EXCLUDE_REGEX))); + + public IncludeExcludeIndexFieldsRule() {} + + public IncludeExcludeIndexFieldsRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.JEXL; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration ruleConfiguration) throws Exception { + ShardQueryValidationConfiguration ruleConfig = (ShardQueryValidationConfiguration) ruleConfiguration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + ruleConfig); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + MetadataHelper metadataHelper = ruleConfig.getMetadataHelper(); + ASTJexlScript jexlScript = (ASTJexlScript) ruleConfig.getParsedQuery(); + // Fetch the set of fields given within any filter:includeRegex or filter:excludeRegex function calls in the query, if any. + Set functions = FetchFunctionFieldsVisitor.fetchFields(jexlScript, + IncludeExcludeIndexFieldsRule.functions, metadataHelper); + if (!functions.isEmpty()) { + Set indexedFields = metadataHelper.getIndexedFields(null); + // Each FunctionField object represents the collection of all fields seen for either filter:includeRegex or filter:excludeRegex. + for (FetchFunctionFieldsVisitor.FunctionFields functionFields : functions) { + Set intersection = Sets.intersection(indexedFields, functionFields.getFields()); + // If the function contains any index fields, add a message to the result. + if (!intersection.isEmpty()) { + result.addMessage("Indexed fields found within the function " + functionFields.getNamespace() + ":" + functionFields.getFunction() + + ": " + String.join(", ", intersection)); + } + } + } + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + + return result; + } + + @Override + public QueryRule copy() { + return new IncludeExcludeIndexFieldsRule(name); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/InvalidQuoteRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/InvalidQuoteRule.java new file mode 100644 index 00000000000..3ddd4f3b632 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/InvalidQuoteRule.java @@ -0,0 +1,67 @@ +package datawave.query.rules; + +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +import datawave.query.lucene.visitors.InvalidQuoteVisitor; +import datawave.query.lucene.visitors.LuceneQueryStringBuildingVisitor; + +/** + * A {@link QueryRule} implementation that will check a LUCENE query for any instances of ` instead of ' being used to quote a phrase. + */ +public class InvalidQuoteRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(InvalidQuoteRule.class); + + public InvalidQuoteRule() {} + + public InvalidQuoteRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Check the query for any phrases with invalid quotes. + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List phrases = InvalidQuoteVisitor.check(luceneQuery); + // If any phrases with invalid quotes were found, add a notice about them. + phrases.stream().map(this::formatMessage).forEach(result::addMessage); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new InvalidQuoteRule(name); + } + + private String formatMessage(QueryNode node) { + // @formatter:off + return new StringBuilder("Invalid quote ") + .append(InvalidQuoteVisitor.INVALID_QUOTE) + .append(" used in phrase \"") + .append(LuceneQueryStringBuildingVisitor.build(node)) + .append("\". Use ' instead.") + .toString(); + // @formatter:on + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/MinimumSlopProximityRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/MinimumSlopProximityRule.java new file mode 100644 index 00000000000..1add5843a65 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/MinimumSlopProximityRule.java @@ -0,0 +1,70 @@ +package datawave.query.rules; + +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; + +import datawave.query.lucene.visitors.InvalidSlopProximityVisitor; +import datawave.query.lucene.visitors.LuceneQueryStringBuildingVisitor; + +/** + * A {@link QueryRule} implementation that will check for any slop phrases where the number is smaller than the number of terms, e.g. + * {@code FIELD:\"term1 term2 term3\"~1} where the 1 should be 3 or greater. + */ +public class MinimumSlopProximityRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(MinimumSlopProximityRule.class); + + public MinimumSlopProximityRule() {} + + public MinimumSlopProximityRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Check the query for slop phrases with invalid proximity numbers. + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List invalidSlops = InvalidSlopProximityVisitor.check(luceneQuery); + invalidSlops.stream().map(this::formatMessage).forEach(result::addMessage); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + private String formatMessage(InvalidSlopProximityVisitor.InvalidSlop invalidSlop) { + SlopQueryNode node = invalidSlop.getNode(); + // @formatter:off + return new StringBuilder().append("Invalid slop proximity, the ") + .append(node.getValue()) + .append(" should be ") + .append(invalidSlop.getMinimum()) + .append(" or greater: ") + .append(LuceneQueryStringBuildingVisitor.build(node)) + .toString(); + // @formatter:on + } + + @Override + public QueryRule copy() { + return new MinimumSlopProximityRule(name); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/NumericValueRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/NumericValueRule.java new file mode 100644 index 00000000000..3b29f368016 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/NumericValueRule.java @@ -0,0 +1,83 @@ +package datawave.query.rules; + +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.log4j.Logger; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +import datawave.data.type.NumberType; +import datawave.query.jexl.visitors.FieldsWithNumericValuesVisitor; +import datawave.query.util.TypeMetadata; + +/** + * Implementation of {@link QueryRule} that will verify that fields with numeric values are actually numeric fields. + */ +public class NumericValueRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(NumericValueRule.class); + + private static final String NUMBER_TYPE = NumberType.class.getName(); + + public NumericValueRule() {} + + public NumericValueRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.JEXL; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration ruleConfiguration) throws Exception { + ShardQueryValidationConfiguration ruleConfig = (ShardQueryValidationConfiguration) ruleConfiguration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + ruleConfig); + } + QueryRuleResult result = new QueryRuleResult(getName()); + + try { + ASTJexlScript jexlScript = (ASTJexlScript) ruleConfig.getParsedQuery(); + // Fetch the set of fields that have numeric values. + Set fields = FieldsWithNumericValuesVisitor.getFields(jexlScript); + // If fields with numeric values were found, check the field types. + if (!fields.isEmpty()) { + // A temporary cache to avoid unecessary lookups via TypeMetadata if we see a field more than once. + Multimap types = HashMultimap.create(); + TypeMetadata typeMetadata = ruleConfig.getTypeMetadata(); + // Maintain insertion order. + Set nonNumericFields = new LinkedHashSet<>(); + // Find any fields that are not a number type. + for (String field : fields) { + if (!types.containsKey(field)) { + types.putAll(field, typeMetadata.getNormalizerNamesForField(field)); + } + if (!types.containsEntry(field, NUMBER_TYPE)) { + nonNumericFields.add(field); + } + } + // If any non-numeric fields were specified with numeric values, add a message to the result. + if (!nonNumericFields.isEmpty()) { + result.addMessage("Numeric values supplied for non-numeric field(s): " + String.join(", ", nonNumericFields)); + } + } + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + + return result; + } + + @Override + public QueryRule copy() { + return new NumericValueRule(name); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/QueryRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/QueryRule.java new file mode 100644 index 00000000000..2f11d4187ff --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/QueryRule.java @@ -0,0 +1,47 @@ +package datawave.query.rules; + +public interface QueryRule { + + /** + * Return the name of this {@link QueryRule}. + * + * @return the name + */ + String getName(); + + /** + * Set the name of this {@link QueryRule}. + * + * @param name + * the name + */ + void setName(String name); + + /** + * Returns whether this {@link QueryRule} can validate its criteria against the given configuration via + * {@link QueryRule#validate(QueryValidationConfiguration)}. + * + * @param configuration + * the configuration + * @return true if this rule can validate the configuration, or false otherwise + */ + boolean canValidate(QueryValidationConfiguration configuration); + + /** + * Validates the given query against the criteria of this {@link QueryRule} and returns a list of messages detailing any issues. + * + * @param configuration + * the query validation configuration to use when validating the given query + * @return the details of any issues found within the query + * @throws Exception + * if any exception occurs + */ + QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception; + + /** + * Returns a copy of this {@link QueryRule}. + * + * @return the clone + */ + public QueryRule copy(); +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/QueryRuleResult.java b/warehouse/query-core/src/main/java/datawave/query/rules/QueryRuleResult.java new file mode 100644 index 00000000000..6f5ee472ab4 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/QueryRuleResult.java @@ -0,0 +1,90 @@ +package datawave.query.rules; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import datawave.query.Constants; + +public class QueryRuleResult { + + private final String ruleName; + private List messages; + private Exception exception; + + public static QueryRuleResult of(String ruleName, String... messages) { + QueryRuleResult result = new QueryRuleResult(ruleName); + for (String message : messages) { + result.addMessage(message); + } + return result; + } + + public static QueryRuleResult of(String ruleName, Exception exception, String... messages) { + QueryRuleResult result = of(ruleName, messages); + result.setException(exception); + return result; + } + + public QueryRuleResult(String ruleName) { + this.ruleName = ruleName; + this.messages = new ArrayList<>(); + } + + public String getRuleName() { + return ruleName; + } + + public List getMessages() { + return messages; + } + + public void addMessage(String message) { + this.messages.add(message); + } + + public void setException(Exception exception) { + this.exception = exception; + } + + public Exception getException() { + return exception; + } + + public boolean exceptionOccurred() { + return exception != null; + } + + public boolean hasMessageOrException() { + return exceptionOccurred() || !messages.isEmpty(); + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + QueryRuleResult that = (QueryRuleResult) object; + return Objects.equals(ruleName, that.ruleName) && Objects.equals(messages, that.messages) && Objects.equals(exception, that.exception); + } + + @Override + public int hashCode() { + return Objects.hash(ruleName, messages, exception); + } + + @Override + public String toString() { + return new StringJoiner(", ", QueryRuleResult.class.getSimpleName() + "[", "]").add("ruleName='" + ruleName + "'").add("messages=" + messages) + .add("exception=" + exception).toString(); + } + + public void addMessages(Collection messages) { + messages.forEach(this::addMessage); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationConfiguration.java new file mode 100644 index 00000000000..e0cdacbf834 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationConfiguration.java @@ -0,0 +1,22 @@ +package datawave.query.rules; + +import datawave.core.query.configuration.GenericQueryConfiguration; +import datawave.microservice.query.Query; + +public interface QueryValidationConfiguration { + + /** + * Returns the {@link Query} settings to be validated. + * + * @return the query settings + */ + Query getQuerySettings(); + + /** + * Returns the {@link GenericQueryConfiguration} for the query. + * + * @return the query configuration + */ + GenericQueryConfiguration getQueryConfiguration(); + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationConfigurationImpl.java b/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationConfigurationImpl.java new file mode 100644 index 00000000000..160303218bb --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationConfigurationImpl.java @@ -0,0 +1,50 @@ +package datawave.query.rules; + +import datawave.core.query.configuration.GenericQueryConfiguration; +import datawave.microservice.query.Query; + +/** + * Provides a baseline implementation of {@link QueryValidationConfiguration}. + */ +public class QueryValidationConfigurationImpl implements QueryValidationConfiguration { + + protected Query querySettings; + + protected GenericQueryConfiguration queryConfiguration; + + /** + * {@inheritDoc} + */ + @Override + public Query getQuerySettings() { + return querySettings; + } + + /** + * Sets the query settings to be validated. + * + * @param query + * the query settings + */ + public void setQuerySettings(Query query) { + this.querySettings = query; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericQueryConfiguration getQueryConfiguration() { + return queryConfiguration; + } + + /** + * Sets the query configuration for the query + * + * @param queryConfiguration + * the query configuration + */ + public void setQueryConfiguration(GenericQueryConfiguration queryConfiguration) { + this.queryConfiguration = queryConfiguration; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationResult.java b/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationResult.java new file mode 100644 index 00000000000..0587e89cd48 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/QueryValidationResult.java @@ -0,0 +1,57 @@ +package datawave.query.rules; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +public class QueryValidationResult { + + private List ruleResults = new ArrayList<>(); + + private Exception exception; + + public List getRuleResults() { + return ruleResults; + } + + public void addRuleResult(QueryRuleResult result) { + this.ruleResults.add(result); + } + + public void addResults(Collection results) { + this.ruleResults.addAll(results); + } + + public Exception getException() { + return exception; + } + + public void setException(Exception exception) { + this.exception = exception; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + QueryValidationResult that = (QueryValidationResult) object; + return Objects.equals(ruleResults, that.ruleResults) && Objects.equals(exception, that.exception); + } + + @Override + public int hashCode() { + return Objects.hash(ruleResults, exception); + } + + @Override + public String toString() { + return new StringJoiner(", ", QueryValidationResult.class.getSimpleName() + "[", "]").add("ruleResults=" + ruleResults).add("exception=" + exception) + .add("exceptionMessage='").toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/ShardQueryRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/ShardQueryRule.java new file mode 100644 index 00000000000..cc5eec7041d --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/ShardQueryRule.java @@ -0,0 +1,48 @@ +package datawave.query.rules; + +import java.util.StringJoiner; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.JexlNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +public abstract class ShardQueryRule extends AbstractQueryRule { + + public ShardQueryRule() { + super(); + } + + public ShardQueryRule(String name) { + super(name); + } + + protected enum Syntax { + JEXL, LUCENE + } + + protected abstract Syntax getSupportedSyntax(); + + @Override + public boolean canValidate(QueryValidationConfiguration configuration) { + if (!(configuration instanceof ShardQueryValidationConfiguration)) { + return false; + } + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + Syntax syntax = getSupportedSyntax(); + if (syntax != null) { + Object query = config.getParsedQuery(); + if (query == null) { + return false; + } + switch (syntax) { + case JEXL: + return query instanceof JexlNode; + case LUCENE: + return query instanceof QueryNode; + default: + throw new IllegalArgumentException("Cannot determine support for syntax " + syntax); + } + } + return true; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/ShardQueryValidationConfiguration.java b/warehouse/query-core/src/main/java/datawave/query/rules/ShardQueryValidationConfiguration.java new file mode 100644 index 00000000000..2b6e6e097e8 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/ShardQueryValidationConfiguration.java @@ -0,0 +1,47 @@ +package datawave.query.rules; + +import datawave.query.util.MetadataHelper; +import datawave.query.util.TypeMetadata; + +/** + * Base implementation of {@link QueryValidationConfiguration} that is specific to shard queries. + */ +public class ShardQueryValidationConfiguration extends QueryValidationConfigurationImpl { + + protected MetadataHelper metadataHelper; + protected TypeMetadata typeMetadata; + protected Object parsedQuery; + protected String queryString; + + public MetadataHelper getMetadataHelper() { + return metadataHelper; + } + + public void setMetadataHelper(MetadataHelper metadataHelper) { + this.metadataHelper = metadataHelper; + } + + public TypeMetadata getTypeMetadata() { + return typeMetadata; + } + + public void setTypeMetadata(TypeMetadata typeMetadata) { + this.typeMetadata = typeMetadata; + } + + public Object getParsedQuery() { + return parsedQuery; + } + + public void setParsedQuery(Object parsedQuery) { + this.parsedQuery = parsedQuery; + } + + public String getQueryString() { + return queryString; + } + + public void setQueryString(String queryString) { + this.queryString = queryString; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/TimeFunctionRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/TimeFunctionRule.java new file mode 100644 index 00000000000..64ac694f7ff --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/TimeFunctionRule.java @@ -0,0 +1,94 @@ +package datawave.query.rules; + +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.log4j.Logger; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +import datawave.data.type.DateType; +import datawave.query.jexl.functions.EvaluationPhaseFilterFunctions; +import datawave.query.jexl.functions.EvaluationPhaseFilterFunctionsDescriptor; +import datawave.query.jexl.visitors.FetchFunctionFieldsVisitor; +import datawave.query.util.TypeMetadata; + +/** + * A {@link QueryRule} implementation that will check if any time-based functions are used with non-date fields in a query. + */ +public class TimeFunctionRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(TimeFunctionRule.class); + + private static final Set> functions = Collections.unmodifiableSet(Sets + .newHashSet(Pair.of(EvaluationPhaseFilterFunctions.EVAL_PHASE_FUNCTION_NAMESPACE, EvaluationPhaseFilterFunctionsDescriptor.TIME_FUNCTION))); + + private static final String DATE_TYPE = DateType.class.getName(); + + public TimeFunctionRule() {} + + public TimeFunctionRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.JEXL; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration ruleConfiguration) throws Exception { + ShardQueryValidationConfiguration ruleConfig = (ShardQueryValidationConfiguration) ruleConfiguration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + ruleConfig); + } + QueryRuleResult result = new QueryRuleResult(getName()); + + try { + ASTJexlScript jexlScript = (ASTJexlScript) ruleConfig.getParsedQuery(); + // Fetch the set of fields used in all of the time functions. + Set functions = FetchFunctionFieldsVisitor.fetchFields(jexlScript, TimeFunctionRule.functions, + ruleConfig.getMetadataHelper()); + // If any time functions were used in the query, check the field types. + if (!functions.isEmpty()) { + TypeMetadata typeMetadata = ruleConfig.getTypeMetadata(); + // A temporary cache to avoid unecessary lookups via TypeMetadata if we see a field more than once. + Multimap types = HashMultimap.create(); + for (FetchFunctionFieldsVisitor.FunctionFields functionFields : functions) { + // Find any fields that are not a date type. Maintain insertion order. + Set invalidFields = new LinkedHashSet<>(); + for (String field : functionFields.getFields()) { + if (!types.containsKey(field)) { + types.putAll(field, typeMetadata.getNormalizerNamesForField(field)); + } + if (!types.containsEntry(field, DATE_TYPE)) { + invalidFields.add(field); + } + } + // If any non-date type fields were found, add a message to the result. + if (!invalidFields.isEmpty()) { + result.addMessage("Function #TIME_FUNCTION (filter:timeFunction) found with fields that are not date types: " + + String.join(", ", invalidFields)); + } + } + } + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + + return result; + } + + @Override + public QueryRule copy() { + return new TimeFunctionRule(name); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/UnescapedSpecialCharsRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/UnescapedSpecialCharsRule.java new file mode 100644 index 00000000000..788d7b58824 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/UnescapedSpecialCharsRule.java @@ -0,0 +1,221 @@ +package datawave.query.rules; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.StringJoiner; +import java.util.stream.Collectors; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.log4j.Logger; + +import com.google.common.collect.SetMultimap; + +import datawave.query.jexl.visitors.UnescapedSpecialCharactersVisitor; + +/** + * A {@link QueryRule} implementation that will check a query for any unescaped characters in literals or patterns in a JEXL query. + */ +public class UnescapedSpecialCharsRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(UnescapedSpecialCharsRule.class); + + private static final String REGEX_VALUE_DESCRIPTOR = "Regex pattern"; + private static final String LITERAL_VALUE_DESCRIPTOR = "Literal string"; + + private Set literalExceptions = new HashSet<>(); + private Set patternExceptions = new HashSet<>(); + private boolean escapedWhitespaceRequiredForLiterals = false; + private boolean escapedWhitespaceRequiredForPatterns = false; + + public UnescapedSpecialCharsRule() {} + + public UnescapedSpecialCharsRule(String name) { + super(name); + } + + public UnescapedSpecialCharsRule(String name, Set literalExceptions, Set patternExceptions, + boolean escapedWhitespaceRequiredForLiterals, boolean escapedWhitespaceRequiredForPatterns) { + super(name); + this.literalExceptions = literalExceptions == null ? Set.of() : Set.copyOf(literalExceptions); + this.patternExceptions = patternExceptions == null ? Set.of() : Set.copyOf(patternExceptions); + this.escapedWhitespaceRequiredForLiterals = escapedWhitespaceRequiredForLiterals; + this.escapedWhitespaceRequiredForPatterns = escapedWhitespaceRequiredForPatterns; + } + + /** + * Returns the set the characters that should not trigger a notice if they are present and unescaped in a literal. + * + * @return a set of characters + */ + public Set getLiteralExceptions() { + return literalExceptions; + } + + /** + * Sets the characters that should not trigger a notice if they are present and unescaped in a literal. + * + * @param literalExceptions + * the characters + */ + public void setLiteralExceptions(Set literalExceptions) { + if (literalExceptions == null) { + this.literalExceptions = Collections.emptySet(); + } else { + this.literalExceptions = Collections.unmodifiableSet(literalExceptions); + } + } + + /** + * Returns the set the characters that should not trigger a notice if they are present and unescaped in a pattern. + * + * @return a set of characters + */ + public Set getPatternExceptions() { + return patternExceptions; + } + + /** + * Sets the characters that should not trigger a notice if they are present and unescaped in a pattern. Note that the following reserved regex characters + * will always be considered exceptions: {@code '.', '+', '*', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\'}. + * + * @param literalSpecialCharExceptions + * the characters + */ + public void setPatternExceptions(Set patternExceptions) { + if (patternExceptions == null) { + this.patternExceptions = Collections.emptySet(); + } else { + this.patternExceptions = Collections.unmodifiableSet(patternExceptions); + } + } + + /** + * Return whether whitespace must be escaped in literals. + * + * @return true if whitespace characters must be escaped in literals, or false otherwise. + */ + public boolean isEscapedWhitespaceRequiredForLiterals() { + return escapedWhitespaceRequiredForLiterals; + } + + /** + * Sets whether whitespace must be escaped in literals. + * + * @param escapedWhitespaceRequiredForLiterals + * true if whitespace characters must be escaped in literals, or false otherwise. + */ + public void setEscapedWhitespaceRequiredForLiterals(boolean escapedWhitespaceRequiredForLiterals) { + this.escapedWhitespaceRequiredForLiterals = escapedWhitespaceRequiredForLiterals; + } + + /** + * Return whether whitespace must be escaped in regex patterns. + * + * @return true if whitespace characters must be escaped in regex patterns, or false otherwise. + */ + public boolean isEscapedWhitespaceRequiredForPatterns() { + return escapedWhitespaceRequiredForPatterns; + } + + /** + * Sets whether whitespace must be escaped in regex patterns. + * + * @param escapedWhitespaceRequiredForLiterals + * true if whitespace characters must be escaped in regex patterns, or false otherwise. + */ + public void setEscapedWhitespaceRequiredForPatterns(boolean escapedWhitespaceRequiredForPatterns) { + this.escapedWhitespaceRequiredForPatterns = escapedWhitespaceRequiredForPatterns; + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.JEXL; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration ruleConfig = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + ruleConfig); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + ASTJexlScript jexlScript = (ASTJexlScript) ruleConfig.getParsedQuery(); + // Check the query for unescaped characters. + UnescapedSpecialCharactersVisitor visitor = UnescapedSpecialCharactersVisitor.check(jexlScript, literalExceptions, + escapedWhitespaceRequiredForLiterals, patternExceptions, escapedWhitespaceRequiredForPatterns); + // Add messages for any unescaped special characters seen in literals or regex patterns. + result.addMessages(getMessages(visitor.getUnescapedCharactersInLiterals(), LITERAL_VALUE_DESCRIPTOR)); + result.addMessages(getMessages(visitor.getUnescapedCharactersInPatterns(), REGEX_VALUE_DESCRIPTOR)); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + + return result; + } + + @Override + public QueryRule copy() { + return new UnescapedSpecialCharsRule(name, literalExceptions, patternExceptions, escapedWhitespaceRequiredForLiterals, + escapedWhitespaceRequiredForPatterns); + } + + /** + * Returns a list of formatted messages regarding the given unescaped characters. + * + * @param unescapedCharacters + * the unescaped characters. + * @param valueDescriptor + * the value descriptor, i.e. whether we're looking at string literals or regex patterns. + * @return + */ + private List getMessages(SetMultimap unescapedCharacters, String valueDescriptor) { + List messages = new ArrayList<>(); + for (String key : unescapedCharacters.keySet()) { + Set characters = unescapedCharacters.get(key); + StringBuilder sb = new StringBuilder(); + sb.append(valueDescriptor).append(" \"").append(key).append("\" has the following unescaped special character(s): "); + String characterList = characters.stream().map(ch -> "'" + ch + "'").collect(Collectors.joining(", ")); + sb.append(characterList); + messages.add(sb.toString()); + } + return messages; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + if (!super.equals(object)) { + return false; + } + UnescapedSpecialCharsRule rule = (UnescapedSpecialCharsRule) object; + return escapedWhitespaceRequiredForLiterals == rule.escapedWhitespaceRequiredForLiterals + && escapedWhitespaceRequiredForPatterns == rule.escapedWhitespaceRequiredForPatterns + && Objects.equals(literalExceptions, rule.literalExceptions) && Objects.equals(patternExceptions, rule.patternExceptions); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), Set.copyOf(literalExceptions), Set.copyOf(patternExceptions), escapedWhitespaceRequiredForLiterals, + escapedWhitespaceRequiredForPatterns); + } + + @Override + public String toString() { + return new StringJoiner(", ", UnescapedSpecialCharsRule.class.getSimpleName() + "[", "]").add("name='" + name + "'") + .add("literalExceptions=" + literalExceptions).add("patternExceptions=" + patternExceptions) + .add("escapedWhitespaceRequiredForLiterals=" + escapedWhitespaceRequiredForLiterals) + .add("escapedWhitespaceRequiredForPatterns=" + escapedWhitespaceRequiredForPatterns).toString(); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/UnescapedWildcardsInPhrasesRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/UnescapedWildcardsInPhrasesRule.java new file mode 100644 index 00000000000..2da5e108aa2 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/UnescapedWildcardsInPhrasesRule.java @@ -0,0 +1,90 @@ +package datawave.query.rules; + +import java.util.List; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; + +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; +import datawave.query.lucene.visitors.BaseVisitor; +import datawave.query.lucene.visitors.UnescapedWildcardsInQuotedPhrasesVisitor; + +/** + * A {@link QueryRule} implementation that will check a LUCENE query for any unescaped wildcard characters in a quoted phrase. + */ +public class UnescapedWildcardsInPhrasesRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(UnescapedWildcardsInPhrasesRule.class); + private static final EscapeQuerySyntax escapeQuerySyntax = new EscapeQuerySyntaxImpl(); + + public UnescapedWildcardsInPhrasesRule() {} + + public UnescapedWildcardsInPhrasesRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Check the query for any phrases with unescaped wildcards. + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List nodes = UnescapedWildcardsInQuotedPhrasesVisitor.check(luceneQuery); + // If any phrases with unescaped wildcards were found, add a notice about them. + nodes.stream().map(this::getFormattedMessage).forEach(result::addMessage); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new UnescapedWildcardsInPhrasesRule(name); + } + + // Return a formatted message for the given node. + private String getFormattedMessage(QuotedFieldQueryNode node) { + StringBuilder sb = new StringBuilder(); + sb.append("Unescaped wildcard found in phrase "); + sb.append(formatQueryNode(node, true)); + sb.append(". Wildcard is incorrect, or phrase should be "); + // Make a copy of the node with the suggested phrase format. + QuotedFieldQueryNode copy = (QuotedFieldQueryNode) BaseVisitor.copy(node); + copy.setText("/" + copy.getTextAsString() + "/"); + sb.append(formatQueryNode(copy, false)); + return sb.toString(); + } + + // Return a string representation of the given node. + private String formatQueryNode(QuotedFieldQueryNode node, boolean quoted) { + StringBuilder sb = new StringBuilder(); + String fieldName = node.getFieldAsString(); + if (!fieldName.isEmpty()) { + sb.append(fieldName).append(":"); + } + // If quoted is true, add quotes around the phrase. + if (quoted) { + sb.append("\""); + } + sb.append(node.getTextAsString()); + if (quoted) { + sb.append("\""); + } + return sb.toString(); + } + +} diff --git a/warehouse/query-core/src/main/java/datawave/query/rules/UnfieldedTermsRule.java b/warehouse/query-core/src/main/java/datawave/query/rules/UnfieldedTermsRule.java new file mode 100644 index 00000000000..9aee615e4fc --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/rules/UnfieldedTermsRule.java @@ -0,0 +1,54 @@ +package datawave.query.rules; + +import java.util.List; +import java.util.StringJoiner; + +import org.apache.log4j.Logger; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; + +import datawave.query.lucene.visitors.UnfieldedTermsVisitor; + +/** + * A {@link QueryRule} implementation that will check a LUCENE query for any unfielded terms. + */ +public class UnfieldedTermsRule extends ShardQueryRule { + + private static final Logger log = Logger.getLogger(UnfieldedTermsRule.class); + + public UnfieldedTermsRule() {} + + public UnfieldedTermsRule(String name) { + super(name); + } + + @Override + protected Syntax getSupportedSyntax() { + return Syntax.LUCENE; + } + + @Override + public QueryRuleResult validate(QueryValidationConfiguration configuration) throws Exception { + ShardQueryValidationConfiguration config = (ShardQueryValidationConfiguration) configuration; + if (log.isDebugEnabled()) { + log.debug("Validating config against instance '" + getName() + "' of " + getClass() + ": " + config); + } + + QueryRuleResult result = new QueryRuleResult(getName()); + try { + // Check the query for any phrases with unfielded terms. + QueryNode luceneQuery = (QueryNode) config.getParsedQuery(); + List terms = UnfieldedTermsVisitor.check(luceneQuery); + // If any unfielded terms were found, add a notice about them. + terms.forEach(term -> result.addMessage("Unfielded term " + term + " found.")); + } catch (Exception e) { + log.error("Error occurred when validating against instance '" + getName() + "' of " + getClass(), e); + result.setException(e); + } + return result; + } + + @Override + public QueryRule copy() { + return new UnfieldedTermsRule(name); + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java index 3a113c75a66..6f148ee48c1 100644 --- a/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java +++ b/warehouse/query-core/src/main/java/datawave/query/tables/ShardQueryLogic.java @@ -30,6 +30,7 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.security.Authorizations; +import org.apache.commons.collections4.Transformer; import org.apache.commons.jexl3.parser.JexlNode; import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.commons.lang3.StringUtils; @@ -78,15 +79,26 @@ import datawave.query.index.lookup.UidIntersector; import datawave.query.iterator.QueryOptions; import datawave.query.iterator.ivarator.IvaratorCacheDirConfig; +import datawave.query.jexl.JexlASTHelper; +import datawave.query.jexl.functions.QueryFunctions; +import datawave.query.jexl.visitors.InvertNodeVisitor; +import datawave.query.jexl.visitors.JexlStringBuildingVisitor; +import datawave.query.jexl.visitors.QueryOptionsFromQueryVisitor; +import datawave.query.jexl.visitors.TreeFlatteningRebuilder; import datawave.query.language.parser.ParseException; import datawave.query.language.parser.QueryParser; +import datawave.query.language.parser.lucene.LuceneSyntaxQueryParser; import datawave.query.language.tree.QueryNode; import datawave.query.model.QueryModel; import datawave.query.planner.DefaultQueryPlanner; import datawave.query.planner.FederatedQueryPlanner; import datawave.query.planner.MetadataHelperQueryModelProvider; import datawave.query.planner.QueryModelProvider; +import datawave.query.planner.QueryOptionsSwitch; import datawave.query.planner.QueryPlanner; +import datawave.query.rules.QueryRule; +import datawave.query.rules.QueryValidationResult; +import datawave.query.rules.ShardQueryValidationConfiguration; import datawave.query.scheduler.PushdownScheduler; import datawave.query.scheduler.Scheduler; import datawave.query.scheduler.SequentialScheduler; @@ -96,15 +108,19 @@ import datawave.query.transformer.EventQueryDataDecoratorTransformer; import datawave.query.transformer.FieldRenameTransform; import datawave.query.transformer.GroupingTransform; +import datawave.query.transformer.QueryValidationResultTransformer; import datawave.query.transformer.UniqueTransform; import datawave.query.util.DateIndexHelper; import datawave.query.util.DateIndexHelperFactory; import datawave.query.util.MetadataHelper; import datawave.query.util.MetadataHelperFactory; import datawave.query.util.QueryStopwatch; +import datawave.query.util.ShardQueryUtils; import datawave.util.time.TraceStopwatch; +import datawave.webservice.query.exception.DatawaveErrorCode; import datawave.webservice.query.exception.QueryException; import datawave.webservice.query.result.event.ResponseObjectFactory; +import datawave.webservice.result.QueryValidationResponse; /** *

    Overview

    QueryTable implementation that works with the JEXL grammar. This QueryTable uses the DATAWAVE metadata, global index, and sharded event @@ -193,14 +209,15 @@ public class ShardQueryLogic extends BaseQueryLogic> implements protected Map configuredProfiles = Maps.newHashMap(); protected Profile selectedProfile = null; protected Map> primaryToSecondaryFieldMap = Collections.emptyMap(); + protected Transformer validationResponseTransformer = null; // Map of syntax names to QueryParser classes private Map querySyntaxParsers = new HashMap<>(); private Set mandatoryQuerySyntax = null; private QueryPlanner planner = null; private QueryParser parser = null; private QueryLogicTransformer transformerInstance = null; - private CardinalityConfiguration cardinalityConfiguration = null; + private List validationRules = null; /** * Basic constructor @@ -321,11 +338,74 @@ public String getJexlQueryString(Query settings) throws ParseException { String originalQuery = settings.getQuery(); originalQuery = this.expandQueryMacros(originalQuery); + if (originalQuery == null) { + throw new IllegalArgumentException("Query cannot be null"); + } - // Determine query syntax (i.e. JEXL, LUCENE, etc.) + // Determine the valid query syntax to use (i.e. JEXL, LUCENE, etc.) + String querySyntax = getValidQuerySyntax(settings); + + if (querySyntax.equals(Constants.JEXL)) { + return originalQuery; + } else { + QueryParser queryParser = getQueryParser(querySyntax); + QueryNode node = queryParser.parse(originalQuery); + String jexlQuery = node.getOriginalQuery(); + if (log.isTraceEnabled()) { + log.trace("luceneQueryString: " + originalQuery + " --> jexlQueryString: " + jexlQuery); + } + return jexlQuery; + } + } + + /** + * Returns the {@link QueryParser} that should be used to parse a query of the given syntax to JEXL. + * + * @param syntax + * the syntax + * @return the query parser + */ + private QueryParser getQueryParser(String syntax) { + if (this.querySyntaxParsers == null) { + throw new IllegalStateException("Query syntax parsers not configured"); + } + QueryParser queryParser = this.querySyntaxParsers.get(syntax); + if (queryParser == null) { + queryParser = getParser(); + if (queryParser == null) { + throw new IllegalArgumentException("QueryParser not configured for syntax: " + syntax); + } + } + return queryParser; + } + + /** + * Returns the query syntax that should be used when parsing the query for the given settings. If any mandatory query syntaxes are specified, the syntax + * will be checked to verify if it is one of the allowed mandatory query syntaxes. After this check, if the query syntax is blank and no default parser has + * been configured, the syntax {@value Constants#JEXL} will be returned, otherwise the original query syntax extracted from the query settings will be + * returned. + * + * @param settings + * the query settings + * @return the query syntax + * @throws IllegalStateException + * if mandatory query syntaxes were configured and the query syntax does not match any of them + */ + private String getValidQuerySyntax(Query settings) { String querySyntax = settings.findParameter(QueryParameters.QUERY_SYNTAX).getParameterValue(); + checkMandatoryQuerySyntaxes(querySyntax); + return (StringUtils.isBlank(querySyntax) && getParser() == null) ? Constants.JEXL : querySyntax; + } - // enforce mandatoryQuerySyntax if set + /** + * Checks if the given query syntax is one of the allowed mandatory query syntaxes, if any mandatory query syntaxes were specified. + * + * @param querySyntax + * @throws IllegalStateException + * if {@link ShardQueryLogic#mandatoryQuerySyntax} is not null and the given query syntax is either empty or not present within the set of + * mandatory query syntaxes + */ + private void checkMandatoryQuerySyntaxes(String querySyntax) { if (null != this.mandatoryQuerySyntax) { if (StringUtils.isEmpty(querySyntax)) { throw new IllegalStateException("Must specify one of the following syntax options: " + this.mandatoryQuerySyntax); @@ -336,47 +416,10 @@ public String getJexlQueryString(Query settings) throws ParseException { } } } + } - QueryParser querySyntaxParser = getParser(); - - if (StringUtils.isBlank(querySyntax)) { - // Default to the class's query parser when one is not provided - // Falling back to Jexl when one is not set on this class - if (null == querySyntaxParser) { - querySyntax = "JEXL"; - } - } else if (!"JEXL".equals(querySyntax)) { - if (null == querySyntaxParsers) { - throw new IllegalStateException("Query syntax parsers not configured"); - } - - querySyntaxParser = querySyntaxParsers.get(querySyntax); - - if (null == querySyntaxParser) { - // No parser was specified, try to default to the parser on the - // class - querySyntaxParser = getParser(); - - if (null == querySyntaxParser) { - throw new IllegalArgumentException("QueryParser not configured for syntax: " + querySyntax); - } - } - } - - if (null == originalQuery) { - throw new IllegalArgumentException("Query cannot be null"); - } else { - if ("JEXL".equals(querySyntax)) { - queryString = originalQuery; - } else { - QueryNode node = querySyntaxParser.parse(originalQuery); - queryString = node.getOriginalQuery(); - if (log.isTraceEnabled()) { - log.trace("luceneQueryString: " + originalQuery + " --> jexlQueryString: " + queryString); - } - } - } - return queryString; + private String getQuerySyntaxOrDefault(String querySyntax) { + return StringUtils.isBlank(querySyntax) && getParser() == null ? Constants.JEXL : querySyntax; } public void initialize(ShardQueryConfiguration config, AccumuloClient client, Query settings, Set auths) throws Exception { @@ -420,33 +463,7 @@ public void initialize(ShardQueryConfiguration config, AccumuloClient client, Qu dateIndexHelper.setTimeTravel(config.isDateIndexTimeTravel()); } - // If the current query planner is a DefaultQueryPlanner or a FederatedQueryPlanner, get the query model if possible. - QueryPlanner queryPlanner = getQueryPlanner(); - DefaultQueryPlanner defaultQueryPlanner = null; - if (queryPlanner instanceof DefaultQueryPlanner) { - defaultQueryPlanner = (DefaultQueryPlanner) queryPlanner; - } else if (queryPlanner instanceof FederatedQueryPlanner) { - defaultQueryPlanner = ((FederatedQueryPlanner) queryPlanner).getQueryPlanner(); - } - - if (defaultQueryPlanner != null) { - defaultQueryPlanner.setMetadataHelper(metadataHelper); - defaultQueryPlanner.setDateIndexHelper(dateIndexHelper); - - QueryModelProvider queryModelProvider = defaultQueryPlanner.getQueryModelProviderFactory().createQueryModelProvider(); - if (queryModelProvider instanceof MetadataHelperQueryModelProvider) { - ((MetadataHelperQueryModelProvider) queryModelProvider).setMetadataHelper(metadataHelper); - ((MetadataHelperQueryModelProvider) queryModelProvider).setConfig(config); - } - - if (null != queryModelProvider.getQueryModel()) { - queryModel = queryModelProvider.getQueryModel(); - } - } - - if (this.queryModel == null) { - loadQueryModel(metadataHelper, config); - } + initializeQueryModel(config, metadataHelper, dateIndexHelper); getQueryPlanner().setCreateUidsIteratorClass(createUidsIteratorClass); getQueryPlanner().setUidIntersector(uidIntersector); @@ -488,6 +505,40 @@ public void initialize(ShardQueryConfiguration config, AccumuloClient client, Qu stopwatch.stop(); } + private QueryModel initializeQueryModel(ShardQueryConfiguration config, MetadataHelper metadataHelper, DateIndexHelper dateIndexHelper) + throws TableNotFoundException, ExecutionException, InstantiationException, IllegalAccessException { + + // If the current query planner is a DefaultQueryPlanner or a FederatedQueryPlanner, get the query model if possible. + QueryPlanner queryPlanner = getQueryPlanner(); + DefaultQueryPlanner defaultQueryPlanner = null; + if (queryPlanner instanceof DefaultQueryPlanner) { + defaultQueryPlanner = (DefaultQueryPlanner) queryPlanner; + } else if (queryPlanner instanceof FederatedQueryPlanner) { + defaultQueryPlanner = ((FederatedQueryPlanner) queryPlanner).getQueryPlanner(); + } + + if (defaultQueryPlanner != null) { + defaultQueryPlanner.setMetadataHelper(metadataHelper); + defaultQueryPlanner.setDateIndexHelper(dateIndexHelper); + + QueryModelProvider queryModelProvider = defaultQueryPlanner.getQueryModelProviderFactory().createQueryModelProvider(); + if (queryModelProvider instanceof MetadataHelperQueryModelProvider) { + ((MetadataHelperQueryModelProvider) queryModelProvider).setMetadataHelper(metadataHelper); + ((MetadataHelperQueryModelProvider) queryModelProvider).setConfig(config); + } + + if (null != queryModelProvider.getQueryModel()) { + queryModel = queryModelProvider.getQueryModel(); + } + } + + if (this.queryModel == null) { + loadQueryModel(metadataHelper, config); + } + + return this.queryModel; + } + private void setupQueryPlanner(ShardQueryConfiguration config) throws TableNotFoundException, ExecutionException, InstantiationException, IllegalAccessException { MetadataHelper metadataHelper = prepareMetadataHelper(config.getClient(), this.getMetadataTableName(), config.getAuthorizations(), config.isRawTypes()); @@ -1344,7 +1395,163 @@ public void close() { log.error("Caught exception trying to close Scheduler", e); } } + } + + @Override + public Object validateQuery(AccumuloClient client, Query settings, Set auths, boolean expandFields, boolean expandValues) throws Exception { + this.config = ShardQueryConfiguration.create(this, settings); + if (log.isTraceEnabled()) { + log.trace("Initializing ShardQueryLogic for query validation: " + System.identityHashCode(this) + '(' + + (this.getSettings() == null ? "empty" : this.getSettings().getId()) + ')'); + } + + // todo - maybe unnecessary, we could just return early if no rules configured. + if (validationRules == null || validationRules.isEmpty()) { + throw new IllegalStateException("Query validation rules not configured."); + } + + // todo - verify if we should allow these to be configured, or always stick to an established default. + this.config.setExpandFields(expandFields); + this.config.setExpandValues(expandValues); + + // Set the connector and authorizations for the config object. + config.setClient(client); + config.setAuthorizations(auths); + config.setMaxScannerBatchSize(getMaxScannerBatchSize()); + config.setMaxIndexBatchSize(getMaxIndexBatchSize()); + + // Load the query parameters. + loadQueryParameters(config, settings); + + // Initialize the metadata helper and query model. + MetadataHelper metadataHelper = prepareMetadataHelper(config.getClient(), this.getMetadataTableName(), config.getAuthorizations(), config.isRawTypes()); + initializeQueryModel(config, metadataHelper, null); + config.setQueryModel(this.queryModel); + + // Set up the initial validation configuration. + ShardQueryValidationConfiguration validationConfig = new ShardQueryValidationConfiguration(); + validationConfig.setQuerySettings(settings); + validationConfig.setQueryConfiguration(config); + validationConfig.setMetadataHelper(metadataHelper); + validationConfig.setTypeMetadata(metadataHelper.getTypeMetadata()); + + // Fetch the query syntax considered to be correct for the query. + String querySyntax = getValidQuerySyntax(settings); + + // Create a copy of the rules. Rules will be removed from this list as they are executed so that we do not execute the same rule twice. + List unexecutedRules = new ArrayList<>(getValidationRules()); + QueryValidationResult result = new QueryValidationResult(); + + // If the query syntax is not JEXL, i.e. LUCENE or otherwise, fetch the query parser and see if it supplies a lucene syntax parser. If so, the query + // needs to be validated against all LUCENE-specific rules that support the given syntax. + if (!querySyntax.equals(Constants.JEXL)) { + // Fetch the query parser for the query syntax. + QueryParser queryParser = getQueryParser(querySyntax); + // If the query parser is one that parses LUCENE, parse the query to LUCENE. + if (queryParser instanceof LuceneSyntaxQueryParser) { + org.apache.lucene.queryparser.flexible.core.nodes.QueryNode luceneQuery; + try { + luceneQuery = ((LuceneSyntaxQueryParser) queryParser).parseToLuceneQueryNode(settings.getQuery()); + } catch (Exception e) { + if (log.isTraceEnabled()) { + log.trace("Failed to parse query " + settings.getQuery() + " to LUCENE for syntax " + querySyntax + ": " + System.identityHashCode(this) + + '(' + (this.getSettings() == null ? "empty" : this.getSettings().getId()) + ')', e); + } + QueryException exception = new QueryException("Failed to parse query as " + querySyntax, e, + DatawaveErrorCode.INVALID_SYNTAX_PARSE_ERROR.getErrorCode()); + result.setException(exception); + return result; + } + + // Update the validation configuration with the parsed lucene + validationConfig.setParsedQuery(luceneQuery); + validationConfig.setQueryString(settings.getQuery()); + + // Validate the lucene query against all rules that support the syntax. + Iterator ruleIter = unexecutedRules.iterator(); + while (ruleIter.hasNext()) { + QueryRule rule = ruleIter.next(); + try { + // Check if the rule supports validating a query of the query's syntax. + if (rule.canValidate(validationConfig)) { + // Validate the query against the rule's criteria. + result.addRuleResult(rule.validate(validationConfig)); + // Remove the rule from the underlying list so that it is not executed again later. + ruleIter.remove(); + } + } catch (Exception e) { + QueryException exception = new QueryException("Error occurred when validating against rule " + rule.getName(), e); + result.setException(exception); + return result; + } + } + } + } + + // Some rules expect to validate a JEXL query. Even if the query was originally provided in a syntax other than JEXL, e.g. LUCENE, we should validate + // the JEXL version of the query against the remaining rules. + + // Parse the query to JEXL. + String jexlQuery; + try { + jexlQuery = getJexlQueryString(settings); + config.setQueryTree(JexlASTHelper.parseAndFlattenJexlQuery(jexlQuery)); + } catch (Exception e) { + if (log.isTraceEnabled()) { + log.trace("Failed to parse query to JEXL: " + System.identityHashCode(this) + '(' + + (this.getSettings() == null ? "empty" : this.getSettings().getId()) + ')', e); + } + QueryException exception = new QueryException("Failed to parse query as JEXL", e, DatawaveErrorCode.INVALID_SYNTAX_PARSE_ERROR.getErrorCode()); + result.setException(exception); + return result; + } + + // Normalize the JEXL query on a very basic level, and apply the query model to the query. + // Extract any query options and add them to the configuration. + Map optionsMap = new HashMap<>(); + if (jexlQuery.contains(QueryFunctions.QUERY_FUNCTION_NAMESPACE + ':')) { + // only do the extra tree visit if the function is present + config.setQueryTree(QueryOptionsFromQueryVisitor.collect(config.getQueryTree(), optionsMap)); + if (!optionsMap.isEmpty()) { + QueryOptionsSwitch.apply(optionsMap, config); + } + } + + // Ensure any nodes with the literal on the left and the identifier on the right are re-ordered. + config.setQueryTree(InvertNodeVisitor.invertSwappedNodes(config.getQueryTree())); + // Uppercase all identifiers. + config.setQueryTree(ShardQueryUtils.upperCaseIdentifiers(metadataHelper, config, config.getQueryTree())); + // Flatten the tree. + config.setQueryTree(TreeFlatteningRebuilder.flatten(config.getQueryTree())); + // Apply the query model. + config.setQueryTree(ShardQueryUtils.applyQueryModel(config.getQueryTree(), config, metadataHelper.getAllFields(config.getDatatypeFilter()), + this.queryModel)); + + // todo - should any other normalization steps be applied? + + // Update the configurations with the target syntax JEXL and the jexl query string. Execute any remaining rules that expect to run against a JEXL query. + validationConfig.setParsedQuery(config.getQueryTree()); + validationConfig.setQueryString(JexlStringBuildingVisitor.buildQuery(config.getQueryTree())); + + // Validate the JEXL query against all rules that support JEXL. + Iterator ruleIter = unexecutedRules.iterator(); + while (ruleIter.hasNext()) { + QueryRule rule = ruleIter.next(); + try { + // Check if the rule supports validating a JEXL query. + if (rule.canValidate(validationConfig)) { + // Validate the query against the rule's criteria. + result.addRuleResult(rule.validate(validationConfig)); + } + } catch (Exception e) { + QueryException exception = new QueryException("Error occurred when validating against rule " + rule.getName(), e); + result.setException(exception); + return result; + } + + } + return result; } @Override @@ -3003,4 +3210,36 @@ public void setFieldIndexHoleMinThreshold(double fieldIndexHoleMinThreshold) { public double getFieldIndexHoleMinThreshold(int fieldIndexHoleMinThreshold) { return getConfig().getFieldIndexHoleMinThreshold(); } + + public List getValidationRules() { + return validationRules; + } + + public void setValidationRules(List validationRules) { + if (validationRules == null) { + this.validationRules = List.of(); + } else { + this.validationRules = new ArrayList<>(); + for (QueryRule rule : validationRules) { + QueryRule copy = rule.copy(); + // If a rule name was not specified, use the name of the class for easier identification later. + if (StringUtils.isBlank(copy.getName())) { + copy.setName(copy.getClass().getSimpleName()); + } + this.validationRules.add(copy); + } + } + } + + @Override + public Transformer getQueryValidationResponseTransformer() { + if (this.validationResponseTransformer == null) { + this.validationResponseTransformer = new QueryValidationResultTransformer(); + } + return validationResponseTransformer; + } + + public void setValidationResponseTransformer(Transformer queryValidationResponseTransformer) { + this.validationResponseTransformer = queryValidationResponseTransformer; + } } diff --git a/warehouse/query-core/src/main/java/datawave/query/transformer/QueryValidationResultTransformer.java b/warehouse/query-core/src/main/java/datawave/query/transformer/QueryValidationResultTransformer.java new file mode 100644 index 00000000000..305b78e61f5 --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/transformer/QueryValidationResultTransformer.java @@ -0,0 +1,79 @@ +package datawave.query.transformer; + +import java.util.ArrayList; +import java.util.List; + +import datawave.webservice.query.exception.QueryException; +import datawave.webservice.query.exception.QueryExceptionType; +import org.apache.commons.collections4.Transformer; + +import datawave.query.rules.QueryRuleResult; +import datawave.query.rules.QueryValidationResult; +import datawave.webservice.result.QueryValidationResponse; + +/** + * A tranformer that will transform a {@link QueryValidationResult} to a {@link QueryValidationResponse}. + */ +public class QueryValidationResultTransformer implements Transformer { + + private static final String UNNAMED_RULE = "UNNAMED_RULE"; + + @Override + public QueryValidationResponse transform(Object input) { + if (!(input instanceof QueryValidationResult)) { + throw new IllegalArgumentException("Input must be an instance of " + QueryValidationResult.class); + } + + QueryValidationResult validationResult = (QueryValidationResult) input; + + // Transform any rule result that has either a message and/or exception to a result object. + List results = new ArrayList<>(); + List executedRules = new ArrayList<>(); + for (QueryRuleResult ruleResult : validationResult.getRuleResults()) { + // If a rule name was not given, use UNNAMED_RULE. + String ruleName = ruleResult.getRuleName(); + if (ruleName == null || ruleName.isEmpty()) { + ruleName = UNNAMED_RULE; + } + + // Add the rule result only if it has a message or exception. + if (ruleResult.hasMessageOrException()) { + // Transform the exception if one is present. + QueryExceptionType exceptionType = null; + Exception exception = ruleResult.getException(); + if (exception != null) { + exceptionType = new QueryExceptionType(); + exceptionType.setMessage(exception.getMessage()); + Throwable throwable = exception.getCause(); + if (throwable != null) { + exceptionType.setCause(throwable.getMessage()); + } + } + + results.add(new QueryValidationResponse.Result(ruleName, ruleResult.getMessages(), exceptionType)); + } + + // Retain a list of all executed rules. + executedRules.add(ruleName); + } + + QueryValidationResponse response = new QueryValidationResponse(); + + // If we had any results, add them to the response. + if (!results.isEmpty()) { + response.setHasResults(true); + response.setResults(results); + } + + // Set the executed rules. + response.setExecutedRules(executedRules); + + // If an exception was captured in the validation result, ensure it is added to the response. + Exception exception = validationResult.getException(); + if (exception != null) { + response.addException(validationResult.getException()); + } + + return response; + } +} diff --git a/warehouse/query-core/src/main/java/datawave/query/util/ShardQueryUtils.java b/warehouse/query-core/src/main/java/datawave/query/util/ShardQueryUtils.java new file mode 100644 index 00000000000..fdf77d2da9d --- /dev/null +++ b/warehouse/query-core/src/main/java/datawave/query/util/ShardQueryUtils.java @@ -0,0 +1,206 @@ +package datawave.query.util; + +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.accumulo.core.client.TableNotFoundException; +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.log4j.Logger; + +import com.google.common.cache.Cache; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +import datawave.query.attributes.ExcerptFields; +import datawave.query.attributes.UniqueFields; +import datawave.query.common.grouping.GroupFields; +import datawave.query.config.ShardQueryConfiguration; +import datawave.query.exceptions.DatawaveFatalQueryException; +import datawave.query.jexl.visitors.CaseSensitivityVisitor; +import datawave.query.jexl.visitors.QueryModelVisitor; +import datawave.query.model.QueryModel; +import datawave.webservice.query.exception.DatawaveErrorCode; +import datawave.webservice.query.exception.QueryException; + +public class ShardQueryUtils { + + private static final Logger log = Logger.getLogger(ShardQueryUtils.class); + + /** + * Uppercases all identifiers in the given script, as well as in the configuration's group-by fields, unique fields, excerpt fields, user projection fields, + * disallow listed fields, and limit fields. + * + * @param metadataHelper + * the metadata helper + * @param config + * the query configuration + * @param script + * the query script + * @return the updated query + */ + public static ASTJexlScript upperCaseIdentifiers(MetadataHelper metadataHelper, ShardQueryConfiguration config, ASTJexlScript script) { + GroupFields groupFields = config.getGroupFields(); + if (groupFields != null && groupFields.hasGroupByFields()) { + groupFields.setMaxFields(toUpperCase(groupFields.getMaxFields())); + groupFields.setSumFields(toUpperCase(groupFields.getSumFields())); + groupFields.setGroupByFields(toUpperCase(groupFields.getGroupByFields())); + groupFields.setAverageFields(toUpperCase(groupFields.getAverageFields())); + groupFields.setCountFields(toUpperCase(groupFields.getCountFields())); + groupFields.setMinFields(toUpperCase(groupFields.getMinFields())); + + // If grouping is set, we must make the projection fields match all the group-by fields and aggregation fields. + config.setProjectFields(groupFields.getProjectionFields()); + } else { + Set projectFields = config.getProjectFields(); + + if (projectFields != null && !projectFields.isEmpty()) { + config.setProjectFields(toUpperCase(projectFields)); + } + } + + UniqueFields uniqueFields = config.getUniqueFields(); + if (uniqueFields != null && !uniqueFields.isEmpty()) { + Sets.newHashSet(uniqueFields.getFields()).stream().forEach(s -> uniqueFields.replace(s, s.toUpperCase())); + } + + ExcerptFields excerptFields = config.getExcerptFields(); + if (excerptFields != null && !excerptFields.isEmpty()) { + Sets.newHashSet(excerptFields.getFields()).stream().forEach(s -> excerptFields.replace(s, s.toUpperCase())); + } + + Set userProjection = config.getRenameFields(); + if (userProjection != null && !userProjection.isEmpty()) { + config.setRenameFields(toUpperCase(userProjection)); + } + + Set disallowlistedFields = config.getDisallowlistedFields(); + if (disallowlistedFields != null && !disallowlistedFields.isEmpty()) { + config.setDisallowlistedFields(toUpperCase(disallowlistedFields)); + } + + Set limitFields = config.getLimitFields(); + if (limitFields != null && !limitFields.isEmpty()) { + config.setLimitFields(toUpperCase(limitFields)); + } + + return (CaseSensitivityVisitor.upperCaseIdentifiers(config, metadataHelper, script)); + } + + /** + * Applies the query model to the given query script and query configuration. If cacheDataTypes is true and allFieldTypeMap is not null, allFieldTypeMap + * will be used to fetch the set of all fields seen for the set of datatype filters within the config, and will be updated as needed. If log is not null, + * messages documenting the field expansion changes will be logged at the trace level. + * + * @param script + * the query script + * @param config + * the query configuration + * @param metadataHelper + * the metadata helper + * @param queryModel + * the query model + * @return the updated query + */ + public static ASTJexlScript applyQueryModel(ASTJexlScript script, ShardQueryConfiguration config, Set allFields, QueryModel queryModel) { + // Create the inverse of the reverse mapping: {display field name => db field name} + // A reverse mapping is always many to on, therefore the inverted reverse mapping can be one to many. + Multimap inverseReverseModel = HashMultimap.create(); + queryModel.getReverseQueryMapping().entrySet().forEach(entry -> inverseReverseModel.put(entry.getValue(), entry.getKey())); + inverseReverseModel.putAll(queryModel.getForwardQueryMapping()); + + // Update the projection fields. + Collection projectFields = config.getProjectFields(), disallowlistedFields = config.getDisallowlistedFields(), + limitFields = config.getLimitFields(); + if (projectFields != null && !projectFields.isEmpty()) { + projectFields = queryModel.remapParameter(projectFields, inverseReverseModel); + if (log != null && log.isTraceEnabled()) { + log.trace("Updated projection set using query model to: " + projectFields); + } + config.setProjectFields(Sets.newHashSet(projectFields)); + } + + // Update the group-by fields. + GroupFields groupFields = config.getGroupFields(); + if (groupFields != null && groupFields.hasGroupByFields()) { + groupFields.remapFields(inverseReverseModel, queryModel.getReverseQueryMapping()); + if (log.isTraceEnabled()) { + log.trace("Updating group-by fields using query model to: " + groupFields); + } + config.setGroupFields(groupFields); + + // If grouping is set, we must make the projection fields match all the group-by fields and aggregation fields. + config.setProjectFields(groupFields.getProjectionFields()); + } + + // Update the unique fields. + UniqueFields uniqueFields = config.getUniqueFields(); + if (uniqueFields != null && !uniqueFields.isEmpty()) { + uniqueFields.remapFields(inverseReverseModel); + if (log.isTraceEnabled()) { + log.trace("Updated unique set using query model to: " + uniqueFields.getFields()); + } + config.setUniqueFields(uniqueFields); + } + + // Update the excerpt fields. + ExcerptFields excerptFields = config.getExcerptFields(); + if (excerptFields != null && !excerptFields.isEmpty()) { + excerptFields.expandFields(inverseReverseModel); + if (log.isTraceEnabled()) { + log.trace("Updated excerpt fields using query model to: " + excerptFields.getFields()); + } + config.setExcerptFields(excerptFields); + } + + // Update the user projection fields. + Set userProjection = config.getRenameFields(); + if (userProjection != null && !userProjection.isEmpty()) { + userProjection = Sets.newHashSet(queryModel.remapParameterEquation(userProjection, inverseReverseModel)); + if (log.isTraceEnabled()) { + log.trace("Updated user projection fields using query model to: " + userProjection); + } + config.setRenameFields(userProjection); + } + + // Update the disallow fields. + if (config.getDisallowlistedFields() != null && !config.getDisallowlistedFields().isEmpty()) { + disallowlistedFields = queryModel.remapParameter(disallowlistedFields, inverseReverseModel); + if (log.isTraceEnabled()) { + log.trace("Updated disallowlist set using query model to: " + disallowlistedFields); + } + config.setDisallowlistedFields(Sets.newHashSet(disallowlistedFields)); + } + + // Update the limit fields. + if (config.getLimitFields() != null && !config.getLimitFields().isEmpty()) { + limitFields = queryModel.remapParameterEquation(limitFields, inverseReverseModel); + if (log.isTraceEnabled()) { + log.trace("Updated limitFields set using query model to: " + limitFields); + } + config.setLimitFields(Sets.newHashSet(limitFields)); + } + + return (QueryModelVisitor.applyModel(script, queryModel, allFields, config.getNoExpansionFields(), config.getLenientFields(), + config.getStrictFields())); + } + + /** + * Returns a copy of the given set with all strings uppercased. + * + * @param strs + * the strings + * @return the uppercased strings + */ + private static Set toUpperCase(Collection strs) { + return strs.stream().map(String::toUpperCase).collect(Collectors.toSet()); + } + + /** + * Do not allow this class to be instatiated. + */ + private ShardQueryUtils() { + throw new UnsupportedOperationException(); + } +} diff --git a/warehouse/query-core/src/main/java/org/apache/lucene/queryparser/flexible/core/nodes/LuceneQueryNodeHelper.java b/warehouse/query-core/src/main/java/org/apache/lucene/queryparser/flexible/core/nodes/LuceneQueryNodeHelper.java new file mode 100644 index 00000000000..8a2fe8e6207 --- /dev/null +++ b/warehouse/query-core/src/main/java/org/apache/lucene/queryparser/flexible/core/nodes/LuceneQueryNodeHelper.java @@ -0,0 +1,30 @@ +package org.apache.lucene.queryparser.flexible.core.nodes; + +import org.apache.commons.lang3.StringUtils; + +public class LuceneQueryNodeHelper { + + /** + * Returns whether the given field is considered a default field for the given node. + * + * @param node + * the node + * @param field + * the field + * @return true if the given field is null or blank, or considered a default field by the node, or false otherwise + */ + public static boolean isDefaultField(QueryNode node, CharSequence field) { + if (StringUtils.isBlank(field)) { + return true; + } + if (node instanceof QueryNodeImpl) { + return ((QueryNodeImpl) node).isDefaultField(field); + } else { + return false; + } + } + + private LuceneQueryNodeHelper() { + throw new UnsupportedOperationException(); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/FetchFunctionFieldsVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/FetchFunctionFieldsVisitorTest.java new file mode 100644 index 00000000000..ac1abac0923 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/FetchFunctionFieldsVisitorTest.java @@ -0,0 +1,414 @@ +package datawave.query.jexl.visitors; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ParseException; +import org.apache.commons.lang3.tuple.Pair; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import datawave.data.type.LcNoDiacriticsType; +import datawave.query.jexl.JexlASTHelper; +import datawave.query.util.MockMetadataHelper; + +public class FetchFunctionFieldsVisitorTest { + + private final Set> functions = new HashSet<>(); + private final MockMetadataHelper metadataHelper = new MockMetadataHelper(); + private String query; + + private final Set expected = new HashSet<>(); + + @Before + public void setUp() throws Exception { + metadataHelper.addNormalizers("FOO", Collections.singleton(new LcNoDiacriticsType())); + metadataHelper.addNormalizers("FOO2", Collections.singleton(new LcNoDiacriticsType())); + metadataHelper.addNormalizers("FOO3", Collections.singleton(new LcNoDiacriticsType())); + metadataHelper.addNormalizers("FOO4", Collections.singleton(new LcNoDiacriticsType())); + } + + @After + public void tearDown() throws Exception { + query = null; + functions.clear(); + clearExpected(); + } + + @Test + public void testGroupByFunction() throws ParseException { + givenQuery("f:groupby(FOO,BAR)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "groupby", "FOO", "BAR")); + assertResults(); + } + + @Test + public void testNoExpansionFunction() throws ParseException { + givenQuery("f:noExpansion(FOO,BAR)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "noExpansion", "FOO", "BAR")); + assertResults(); + } + + @Test + public void testLenientFunction() throws ParseException { + givenQuery("f:lenient(FOO,BAR)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "lenient", "FOO", "BAR")); + assertResults(); + } + + @Test + public void testStrictFunction() throws ParseException { + givenQuery("f:strict(FOO,BAR)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "strict", "FOO", "BAR")); + assertResults(); + } + + @Test + public void testExcerptFieldsFunction() throws ParseException { + givenQuery("f:excerpt_fields(FOO,BAR)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "excerpt_fields", "FOO", "BAR")); + assertResults(); + } + + @Test + public void testUniqueFunction() throws ParseException { + givenQuery("f:unique(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique", "FOO", "BAR", "BAT")); + assertResults(); + + givenQuery("f:unique('FOO[ALL]','BAR[DAY]','BAT[MINUTE,SECOND]')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueByYearFunction() throws ParseException { + givenQuery("f:unique_by_year(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_year", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueByMonthFunction() throws ParseException { + givenQuery("f:unique_by_month(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_month", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueByDayFunction() throws ParseException { + givenQuery("f:unique_by_day(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_day", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueByHourFunction() throws ParseException { + givenQuery("f:unique_by_hour(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_hour", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueByTenthOfHourFunction() throws ParseException { + givenQuery("f:unique_by_tenth_of_hour(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_tenth_of_hour", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueByMinuteFunction() throws ParseException { + givenQuery("f:unique_by_minute(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_minute", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueBySecondFunction() throws ParseException { + givenQuery("f:unique_by_second(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_second", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testUniqueByMillisecondFunction() throws ParseException { + givenQuery("f:unique_by_millisecond(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "unique_by_millisecond", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testRenameFunction() throws ParseException { + givenQuery("f:rename('FOO=FOO2','BAR=BAR2')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "rename", "FOO", "BAR")); + assertResults(); + } + + @Test + public void testSumFunction() throws ParseException { + givenQuery("f:sum(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "sum", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testMinFunction() throws ParseException { + givenQuery("f:min(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "min", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testMaxFunction() throws ParseException { + givenQuery("f:max(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "max", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testAverageFunction() throws ParseException { + givenQuery("f:average(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "average", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testCountFunction() throws ParseException { + givenQuery("f:count(FOO,BAR,BAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "count", "FOO", "BAR", "BAT")); + assertResults(); + } + + @Test + public void testContentFunction_phrase() throws ParseException { + givenQuery("content:phrase(FOO, termOffsetMap, 'bar', 'baz')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("content", "phrase", "FOO")); + assertResults(); + + // Multi-fielded + givenQuery("content:phrase((FOO|FOO2), termOffsetMap, 'bar', 'baz')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("content", "phrase", "FOO", "FOO2")); + assertResults(); + } + + @Test + public void testContentFunction_scoredPhrase() throws ParseException { + givenQuery("content:scoredPhrase(FOO, -1.5, termOffsetMap, 'bar', 'baz')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("content", "scoredPhrase", "FOO")); + assertResults(); + + // Multi-fielded + givenQuery("content:scoredPhrase((FOO|FOO2), -1.5, termOffsetMap, 'bar', 'baz')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("content", "scoredPhrase", "FOO", "FOO2")); + assertResults(); + } + + @Test + public void testContentFunction_adjacent() throws ParseException { + givenQuery("content:adjacent(FOO, termOffsetMap, 'bar', 'baz')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("content", "adjacent", "FOO")); + assertResults(); + } + + @Test + public void testContentFunction_within() throws ParseException { + givenQuery("content:within(FOO, 5, termOffsetMap, 'bar', 'baz')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("content", "within", "FOO")); + assertResults(); + } + + @Test + public void testFilterIncludeRegex() throws ParseException { + givenQuery("filter:includeRegex(FOO, 'bar.*')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "includeRegex", "FOO")); + assertResults(); + } + + @Test + public void testFilterExcludeRegex() throws ParseException { + givenQuery("filter:excludeRegex(FOO, 'bar.*')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "excludeRegex", "FOO")); + assertResults(); + } + + @Test + public void testFilterTimeFunction() throws ParseException { + givenQuery("filter:timeFunction(DEATH_DATE,BIRTH_DATE,'-','>',2522880000000L)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "timeFunction", "DEATH_DATE", "BIRTH_DATE")); + assertResults(); + } + + @Test + public void testFilterIsNullFunction() throws ParseException { + givenQuery("filter:isNull(FOO)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "isNull", "FOO")); + assertResults(); + } + + @Test + public void testFilterOccurrenceFunction() throws ParseException { + givenQuery("filter:occurrence(FOO,'>',3)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "occurrence", "FOO")); + assertResults(); + } + + @Test + public void testFilterBetweenDatesFunction() throws ParseException { + givenQuery("filter:betweenDates(FOO, '20140101', '20140102')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "betweenDates", "FOO")); + assertResults(); + } + + @Test + public void testFilterAfterDateFunction() throws ParseException { + givenQuery("filter:afterDate(FOO, '20140101')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "afterDate", "FOO")); + assertResults(); + } + + @Test + public void testFilterBeforeDateFunction() throws ParseException { + givenQuery("filter:beforeDate(FOO, '20140101')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "beforeDate", "FOO")); + assertResults(); + } + + @Test + public void testFilterBetweenLoadDatesFunction() throws ParseException { + givenQuery("filter:betweenLoadDates(FOO, '20140101', '20140102')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "betweenLoadDates", "FOO")); + assertResults(); + } + + @Test + public void testFilterAfterLoadDateFunction() throws ParseException { + givenQuery("filter:afterLoadDate(FOO, '20140101')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "afterLoadDate", "FOO")); + assertResults(); + } + + @Test + public void testFilterBeforeLoadDateFunction() throws ParseException { + givenQuery("filter:beforeLoadDate(FOO, '20140101')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "beforeLoadDate", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_intersects() throws ParseException { + givenQuery("geowave:intersects(FOO, 'POINT(4 4)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "intersects", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_overlaps() throws ParseException { + givenQuery("geowave:overlaps(FOO, 'POINT(5 5)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "overlaps", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_intersectsAndOverlaps() throws ParseException { + givenQuery("geowave:intersects(FOO, 'POINT(4 4)') || geowave:overlaps(FOO, 'POINT(5 5)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "intersects", "FOO")); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "overlaps", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_contains() throws ParseException { + givenQuery("geowave:contains(FOO, 'POINT(5 5)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "contains", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_covers() throws ParseException { + givenQuery("geowave:covers(FOO, 'POINT(5 5)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "covers", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_covered_by() throws ParseException { + givenQuery("geowave:covered_by(FOO, 'POINT(5 5)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "covered_by", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_crosses() throws ParseException { + givenQuery("geowave:crosses(FOO, 'POINT(5 5)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "crosses", "FOO")); + assertResults(); + } + + @Test + public void testGeoWaveFunction_within() throws ParseException { + givenQuery("geowave:within(FOO, 'POINT(5 5)')"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "within", "FOO")); + assertResults(); + } + + @Test + public void testMultipleFunctionsWithoutFilter() throws ParseException { + givenQuery("geowave:within(FOO, 'POINT(5 5)') && filter:includeRegex(BAR, 'abc') && f:strict(BAT,HAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("geowave", "within", "FOO")); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "includeRegex", "BAR")); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "strict", "BAT", "HAT")); + assertResults(); + } + + @Test + public void testMultipleFunctionsWithFilter() throws ParseException { + givenFunctionFilter("filter", "includeRegex"); + givenQuery("geowave:within(FOO, 'POINT(5 5)') && filter:includeRegex(BAR, 'abc') && f:strict(BAT,HAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "includeRegex", "BAR")); + assertResults(); + } + + @Test + public void testMultipleFilters() throws ParseException { + givenFunctionFilter("filter", "includeRegex"); + givenFunctionFilter("f", "strict"); + givenQuery("geowave:within(FOO, 'POINT(5 5)') && filter:includeRegex(BAR, 'abc') && f:strict(BAT,HAT)"); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("filter", "includeRegex", "BAR")); + expect(FetchFunctionFieldsVisitor.FunctionFields.of("f", "strict", "BAT", "HAT")); + assertResults(); + } + + @Test + public void testFilterWithNoMatches() throws ParseException { + givenFunctionFilter("f", "lenient"); + givenQuery("geowave:within(FOO, 'POINT(5 5)') && filter:includeRegex(BAR, 'abc') && f:strict(BAT,HAT)"); + assertResults(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void givenFunctionFilter(String namespace, String function) { + functions.add(Pair.of(namespace, function)); + } + + private void expect(FetchFunctionFieldsVisitor.FunctionFields functionFields) { + this.expected.add(functionFields); + } + + private void clearExpected() { + this.expected.clear(); + } + + private void assertResults() throws ParseException { + ASTJexlScript script = JexlASTHelper.parseJexlQuery(query); + Set actual = FetchFunctionFieldsVisitor.fetchFields(script, functions, metadataHelper); + Assert.assertEquals(expected, actual); + clearExpected(); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/FieldsWithNumericValuesVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/FieldsWithNumericValuesVisitorTest.java new file mode 100644 index 00000000000..4ee1e2b4496 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/FieldsWithNumericValuesVisitorTest.java @@ -0,0 +1,95 @@ +package datawave.query.jexl.visitors; + +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ParseException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import datawave.query.jexl.JexlASTHelper; + +class FieldsWithNumericValuesVisitorTest { + + private String query; + private final Set expectedFields = new LinkedHashSet<>(); + + @AfterEach + void tearDown() { + query = null; + expectedFields.clear(); + } + + /** + * Test the various field operators with string values. + * + * @param operator + * the operator + */ + @ParameterizedTest + @ValueSource(strings = {"==", "!=", "<", ">", "<=", ">="}) + void testOperatorsWithTextValue(String operator) throws ParseException { + givenQuery("FOO " + operator + " 'abc'"); + + // Do not expect any fields. + assertResult(); + } + + /** + * Test the various field operators with boolean values. + * + * @param operator + * the operator + */ + @ParameterizedTest + @ValueSource(strings = {"==", "!=", "<", ">", "<=", ">="}) + void testOperatorsWithBooleanValue(String operator) throws ParseException { + givenQuery("FOO " + operator + " true"); + + // Do not expect any fields. + assertResult(); + } + + /** + * Test the various field operators with numeric values. + * + * @param operator + * the operator + */ + @ParameterizedTest + @ValueSource(strings = {"==", "!=", "<", ">", "<=", ">="}) + void testOperatorsWithNumericValue(String operator) throws ParseException { + givenQuery("FOO " + operator + " 1"); + expectFields("FOO"); + assertResult(); + } + + /** + * Test multiple fields with numeric values. + */ + @Test + void testMultipleFieldsWithNumericValues() throws ParseException { + givenQuery("FOO == 'abc' && BAR != true || HAT > 3 || BAT < 5 || HEN <= 15 || VEE >= 20"); + expectFields("HAT", "BAT", "HEN", "VEE"); + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void expectFields(String... fields) { + this.expectedFields.addAll(List.of(fields)); + } + + private void assertResult() throws ParseException { + ASTJexlScript script = JexlASTHelper.parseJexlQuery(query); + Set actual = FieldsWithNumericValuesVisitor.getFields(script); + Assertions.assertEquals(expectedFields, actual); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java index df8eedd612a..18f5d408c16 100644 --- a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryFieldsVisitorTest.java @@ -104,7 +104,132 @@ public void testUnionOfNestedIntersection() throws ParseException { test(query, Sets.newHashSet("FOO", "FOO2", "FOO3", "FOO4")); } - // Some functions + // Query functions + + @Test + public void testGroupByFunction() throws ParseException { + String query = "f:groupby(FOO,BAR)"; + test(query, Sets.newHashSet("FOO", "BAR")); + } + + @Test + public void testNoExpansionFunction() throws ParseException { + String query = "f:noExpansion(FOO,BAR)"; + test(query, Sets.newHashSet("FOO", "BAR")); + } + + @Test + public void testLenientFunction() throws ParseException { + String query = "f:lenient(FOO,BAR)"; + test(query, Sets.newHashSet("FOO", "BAR")); + } + + @Test + public void testStrictFunction() throws ParseException { + String query = "f:strict(FOO,BAR)"; + test(query, Sets.newHashSet("FOO", "BAR")); + } + + @Test + public void testExcerptFieldsFunction() throws ParseException { + String query = "f:excerpt_fields(FOO,BAR)"; + test(query, Sets.newHashSet("FOO", "BAR")); + } + + @Test + public void testUniqueFunction() throws ParseException { + String query = "f:unique(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + + query = "f:unique('FOO[ALL]','BAR[DAY]','BAT[MINUTE,SECOND]')"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueByYearFunction() throws ParseException { + String query = "f:unique_by_year(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueByMonthFunction() throws ParseException { + String query = "f:unique_by_month(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueByDayFunction() throws ParseException { + String query = "f:unique_by_day(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueByHourFunction() throws ParseException { + String query = "f:unique_by_hour(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueByTenthOfHourFunction() throws ParseException { + String query = "f:unique_by_tenth_of_hour(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueByMinuteFunction() throws ParseException { + String query = "f:unique_by_minute(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueBySecondFunction() throws ParseException { + String query = "f:unique_by_second(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testUniqueByMillisecondFunction() throws ParseException { + String query = "f:unique_by_millisecond(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testRenameFunction() throws ParseException { + String query = "f:rename('FOO=FOO2','BAR=BAR2')"; + test(query, Sets.newHashSet("FOO", "BAR")); + } + + @Test + public void testSumFunction() throws ParseException { + String query = "f:sum(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testMinFunction() throws ParseException { + String query = "f:min(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testMaxFunction() throws ParseException { + String query = "f:max(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testAverageFunction() throws ParseException { + String query = "f:average(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + @Test + public void testCountFunction() throws ParseException { + String query = "f:count(FOO,BAR,BAT)"; + test(query, Sets.newHashSet("FOO", "BAR", "BAT")); + } + + // Content functions @Test public void testContentFunction_phrase() throws ParseException { @@ -120,6 +245,20 @@ public void testContentFunction_phrase() throws ParseException { test(query, Collections.singleton("FOO")); } + @Test + public void testContentFunction_scoredPhrase() throws ParseException { + String query = "content:scoredPhrase(FOO, -1.5, termOffsetMap, 'bar', 'baz')"; + test(query, Collections.singleton("FOO")); + + // Multi-fielded + query = "content:scoredPhrase((FOO|FOO2), -1.5, termOffsetMap, 'bar', 'baz')"; + test(query, Sets.newHashSet("FOO", "FOO2")); + + // Fields within intersection + query = "(content:scoredPhrase(-1.5, termOffsetMap, 'bar', 'baz') && FOO == 'bar' && FOO == 'baz')"; + test(query, Collections.singleton("FOO")); + } + @Test public void testContentFunction_adjacent() throws ParseException { String query = "content:adjacent(FOO, termOffsetMap, 'bar', 'baz')"; @@ -154,6 +293,60 @@ public void testFilterExcludeRegex() throws ParseException { test(query, Collections.singleton("FOO")); } + @Test + public void testFilterTimeFunction() throws ParseException { + String query = "filter:timeFunction(DEATH_DATE,BIRTH_DATE,'-','>',2522880000000L)"; + test(query, Sets.newHashSet("DEATH_DATE", "BIRTH_DATE")); + } + + @Test + public void testFilterIsNullFunction() throws ParseException { + String query = "filter:isNull(FOO)"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testFilterOccurrenceFunction() throws ParseException { + String query = "filter:occurrence(FOO,'>',3)"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testFilterBetweenDatesFunction() throws ParseException { + String query = "filter:betweenDates(FOO, '20140101', '20140102')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testFilterAfterDateFunction() throws ParseException { + String query = "filter:afterDate(FOO, '20140101')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testFilterBeforeDateFunction() throws ParseException { + String query = "filter:beforeDate(FOO, '20140101')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testFilterBetweenLoadDatesFunction() throws ParseException { + String query = "filter:betweenLoadDates(FOO, '20140101', '20140102')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testFilterAfterLoadDateFunction() throws ParseException { + String query = "filter:afterLoadDate(FOO, '20140101')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testFilterBeforeLoadDateFunction() throws ParseException { + String query = "filter:beforeLoadDate(FOO, '20140101')"; + test(query, Collections.singleton("FOO")); + } + // Geowave functions @Test public void testGeoWaveFunction_intersects() throws ParseException { @@ -173,8 +366,37 @@ public void testGeoWaveFunction_intersectsAndOverlaps() throws ParseException { test(query, Collections.singleton("FOO")); } - // Misc. tests + @Test + public void testGeoWaveFunction_contains() throws ParseException { + String query = "geowave:contains(FOO, 'POINT(5 5)')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testGeoWaveFunction_covers() throws ParseException { + String query = "geowave:covers(FOO, 'POINT(5 5)')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testGeoWaveFunction_covered_by() throws ParseException { + String query = "geowave:covered_by(FOO, 'POINT(5 5)')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testGeoWaveFunction_crosses() throws ParseException { + String query = "geowave:crosses(FOO, 'POINT(5 5)')"; + test(query, Collections.singleton("FOO")); + } + + @Test + public void testGeoWaveFunction_within() throws ParseException { + String query = "geowave:within(FOO, 'POINT(5 5)')"; + test(query, Collections.singleton("FOO")); + } + // Misc. tests @Test public void testAnyFieldAndNoField() throws ParseException { String query = "_ANYFIELD_ == 'bar' && _NOFIELD_ == 'baz'"; diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryPatternsVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryPatternsVisitorTest.java new file mode 100644 index 00000000000..ec0d42ea610 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/QueryPatternsVisitorTest.java @@ -0,0 +1,100 @@ +package datawave.query.jexl.visitors; + +import java.lang.reflect.Array; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ParseException; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; + +import datawave.query.jexl.JexlASTHelper; + +public class QueryPatternsVisitorTest { + + private String query; + private final Set expected = new HashSet<>(); + + @After + public void tearDown() throws Exception { + query = null; + expected.clear(); + } + + @Test + public void testNoPatterns() throws ParseException { + givenQuery("BAR == '1'"); + assertResults(); + } + + @Test + public void testER() throws ParseException { + givenQuery("BAR == '1' && FOO =~ '1234.*\\d'"); + expectPatterns("1234.*\\d"); + assertResults(); + + } + + @Test + public void testNR() throws ParseException { + givenQuery("BAR == '1' && FOO !~ '1234.*\\d'"); + expectPatterns("1234.*\\d"); + assertResults(); + } + + @Test + public void testFilterFunctionIncludeRegex() throws ParseException { + givenQuery("A == '1' && filter:includeRegex(B,'*2*')"); + expectPatterns("*2*"); + assertResults(); + } + + @Test + public void testFilterFunctionExcludeRegex() throws ParseException { + givenQuery("A == '1' && filter:excludeRegex(B,'*2*')"); + expectPatterns("*2*"); + assertResults(); + } + + @Test + public void testFilterFunctionGetAllMatches() throws ParseException { + givenQuery("A == '1' && filter:getAllMatches(B,'*2*')"); + expectPatterns("*2*"); + assertResults(); + } + + @Test + public void testDoubleSidedER() throws ParseException { + givenQuery("A =~ B"); + assertResults(); + } + + @Test + public void testDoubleSidedNR() throws ParseException { + givenQuery("A !~ B"); + assertResults(); + } + + @Test + public void testCombo() { + givenQuery("BAR == '1' && FOO =~ '1234.*\\d' && FOO !~ '444.*' && filter:includeRegex(B,'*2*')"); + expectPatterns("1234.*\\d", "444.*", "*2*"); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void expectPatterns(String... patterns) { + expected.addAll(Arrays.asList(patterns)); + } + + private void assertResults() throws ParseException { + ASTJexlScript script = JexlASTHelper.parseJexlQuery(query); + Set patterns = QueryPatternsVisitor.findPatterns(script); + Assert.assertEquals(expected, patterns); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/UnescapedSpecialCharactersVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/UnescapedSpecialCharactersVisitorTest.java new file mode 100644 index 00000000000..f1275f2ee8f --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/jexl/visitors/UnescapedSpecialCharactersVisitorTest.java @@ -0,0 +1,275 @@ +package datawave.query.jexl.visitors; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ParseException; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +import datawave.query.jexl.JexlASTHelper; + +public class UnescapedSpecialCharactersVisitorTest { + + private String query; + private final Set literalExceptions = new HashSet<>(); + private boolean escapedWhitespaceRequiredForLiterals; + private final Set patternExceptions = new HashSet<>(); + private boolean escapedWhitespaceRequiredForPatterns; + + private final Multimap expectedLiterals = HashMultimap.create(); + private final Multimap expectedPatterns = HashMultimap.create(); + + @After + public void tearDown() throws Exception { + query = null; + clearExpected(); + escapedWhitespaceRequiredForLiterals = false; + escapedWhitespaceRequiredForPatterns = false; + } + + /** + * Test a literal and pattern that contain alphanumeric characters only. + */ + @Test + public void testNoSpecialCharacters() throws ParseException { + String alphanumericStr = ofChars('a', 'z') + ofChars('A', 'Z') + ofChars('0', '9'); + givenQuery("FOO == '" + alphanumericStr + "' || FOO =~ '" + alphanumericStr + "'"); + + assertResults(); + } + + private String ofChars(char start, char end) { + return IntStream.rangeClosed(start, end).mapToObj(c -> "" + ((char) c)).collect(Collectors.joining()); + } + + /** + * Test a literal and pattern with whitespace in them and do not allow unescaped whitespace. + */ + @Test + public void testUnescapedWhitespace() throws ParseException { + givenEscapedWhitespaceRequiredForLiterals(true); + givenEscapedWhitespaceRequiredForPatterns(true); + givenQuery("FOO == 'ab c' || FOO =~ 'ab cd'"); + + expectCharsForLiterals("ab c", ' '); + expectCharsForPatterns("ab cd", ' '); + + assertResults(); + } + + /** + * Test a literal and pattern with whitespace in them and allow unescaped whitespace. + */ + @Test + public void testUnescapedWhitespaceAllowed() throws ParseException { + givenEscapedWhitespaceRequiredForLiterals(false); + givenEscapedWhitespaceRequiredForPatterns(false); + givenQuery("FOO == 'ab c' || FOO =~ 'ab cd'"); + + // Do not expect any unescaped chars. + + assertResults(); + } + + /** + * Test literals and patterns with unescaped special chars at the start, in the middle, and at the end. + */ + @Test + public void testUnescapedSpecialCharAtDifferentIndexes() throws ParseException { + // Test special chars at the start of the string. + givenQuery("FOO == '&abc' || FOO =~ '&abc'"); + expectCharsForLiterals("&abc", '&'); + expectCharsForPatterns("&abc", '&'); + assertResults(); + + // Test special chars in the middle of the string. + givenQuery("FOO == 'a&bc' || FOO =~ 'a&bc'"); + clearExpected(); + expectCharsForLiterals("a&bc", '&'); + expectCharsForPatterns("a&bc", '&'); + assertResults(); + + // Test special chars at the end of the string. + givenQuery("FOO == 'abc&' || FOO =~ 'abc&'"); + clearExpected(); + expectCharsForLiterals("abc&", '&'); + expectCharsForPatterns("abc&", '&'); + assertResults(); + } + + /** + * Test a literal and pattern with a special character that is allowed to be escaped. + */ + @Test + public void testSpecialCharThatIsException() throws ParseException { + givenLiteralExceptions('&'); + givenPatternExceptions('&'); + givenQuery("FOO == 'ab&c' || FOO =~ 'ab&d'"); + + // Do not expect any unescaped chars. + + assertResults(); + } + + /** + * Test a literal and pattern with a special character that is not an exception and is escaped. + */ + @Test + public void testEscapedSpecialChar() throws ParseException { + givenQuery("FOO == 'ab\\&c' || FOO =~ 'ab\\&d'"); + + // Do not expect any unescaped chars. + assertResults(); + } + + /** + * Test that when we see a double backslash, it does not escape any special characters directly after it. + */ + @Test + public void testDoubleBackslashDoesNotEscapeCharacter() throws ParseException { + // Backslashes must be doubly escaped in literals, but not patterns when parsed to JEXL. + givenQuery("FOO == 'ab\\\\\\\\&c' || FOO =~ 'ab\\\\&d'"); + expectCharsForLiterals("ab\\\\&c", '&'); + expectCharsForPatterns("ab\\\\&d", '&'); + + assertResults(); + } + + /** + * Test that when we see a triple backslash, the last backslash escapes a special characters directly after it. + */ + @Test + public void testTripleBackslashEscapesCharacter() throws ParseException { + // Backslashes must be doubly escaped in literals, but not patterns when parsed to JEXL. + givenQuery("FOO == 'ab\\\\\\\\\\\\&c' || FOO =~ 'ab\\\\\\&d'"); + + // Do not expect any unescaped chars. + assertResults(); + } + + /** + * Test that an unescaped backlash in a literal will be noted, but not an unescaped backslash in a pattern since it is a regex-reserved char. + */ + @Test + public void testUnescapedBackslashInLiteral() throws ParseException { + // Backslashes must be doubly escaped in literals, but not patterns when parsed to JEXL. + givenQuery("FOO == '\\\\' || FOO =~ '\\\\'"); + clearExpected(); + expectCharsForLiterals("\\", '\\'); + + assertResults(); + } + + /** + * Test that regex-reserved characters do not get flagged as unescaped special characters in patterns. + */ + @Test + public void testRegexReservedCharacters() throws ParseException { + // This is not a valid pattern, but patterns are not compiled in the visitor, so an exception will not be thrown. + givenQuery("FOO =~ '.+*?^$()[]{}|\\\\'"); + + // Do not expect any unescaped characters. + assertResults(); + } + + /** + * Test empty strings will not result in flagged special characters. + */ + @Test + public void testEmptyStrings() throws ParseException { + givenQuery("FOO == '' || FOO =~ ''"); + + // Do not expect any unescaped chars. + assertResults(); + } + + /** + * Test that regex patterns inside of ER, NR, and function nodes are evaluated. + */ + @Test + public void testPossiblePatternLocations() throws ParseException { + givenQuery("FOO =~ 'er&' && FOO !~ 'nr&' && filter:includeRegex(FOO, 'function&')"); + expectCharsForPatterns("er&", '&'); + expectCharsForPatterns("nr&", '&'); + expectCharsForPatterns("function&", '&'); + + assertResults(); + } + + /** + * Test that literal strings for EQ, NE, LT, GT, LE, and GE nodes are evaluated. + */ + @Test + public void testPossibleLiteralLocations() throws ParseException { + givenQuery("FOO == 'eq&' || FOO != 'ne&' || FOO < 'lt&' || FOO > 'gt&' || FOO <= 'le&' || FOO >= 'ge&'"); + expectCharsForLiterals("eq&", '&'); + expectCharsForLiterals("ne&", '&'); + expectCharsForLiterals("lt&", '&'); + expectCharsForLiterals("gt&", '&'); + expectCharsForLiterals("le&", '&'); + expectCharsForLiterals("ge&", '&'); + + assertResults(); + } + + @Test + public void testMultipleSpecialCharactersFound() throws ParseException { + givenQuery("FOO == 'ab^123%34#' || FOO =~ '343&kje:jd@'"); + expectCharsForLiterals("ab^123%34#", '^', '%', '#'); + expectCharsForPatterns("343&kje:jd@", '&', ':', '@'); + + assertResults(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void givenLiteralExceptions(Character... chars) { + this.literalExceptions.addAll(List.of(chars)); + } + + private void givenEscapedWhitespaceRequiredForLiterals(boolean bool) { + this.escapedWhitespaceRequiredForLiterals = bool; + } + + private void givenPatternExceptions(Character... chars) { + this.patternExceptions.addAll(List.of(chars)); + } + + private void givenEscapedWhitespaceRequiredForPatterns(boolean bool) { + this.escapedWhitespaceRequiredForPatterns = bool; + } + + private void expectCharsForLiterals(String literal, Character... characters) { + expectedLiterals.putAll(literal, List.of(characters)); + } + + private void expectCharsForPatterns(String pattern, Character... characters) { + expectedPatterns.putAll(pattern, List.of(characters)); + } + + private void clearExpected() { + this.expectedLiterals.clear(); + this.expectedPatterns.clear(); + } + + private void assertResults() throws ParseException { + ASTJexlScript script = JexlASTHelper.parseJexlQuery(query); + UnescapedSpecialCharactersVisitor visitor = UnescapedSpecialCharactersVisitor.check(script, literalExceptions, escapedWhitespaceRequiredForLiterals, + patternExceptions, escapedWhitespaceRequiredForPatterns); + Multimap actualLiterals = visitor.getUnescapedCharactersInLiterals(); + Multimap actualPatterns = visitor.getUnescapedCharactersInPatterns(); + Assert.assertEquals("Unescaped chars for literals did not match expected", expectedLiterals, actualLiterals); + Assert.assertEquals("Unescaped chars for patterns did not match expected", expectedPatterns, actualPatterns); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java b/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java index 4779d421239..69f0459ae0a 100644 --- a/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java +++ b/warehouse/query-core/src/test/java/datawave/query/language/parser/jexl/TestLuceneToJexlQueryParser.java @@ -6,6 +6,7 @@ import static org.junit.Assert.fail; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline; @@ -19,9 +20,11 @@ import datawave.ingest.data.tokenize.TokenSearch; import datawave.query.Constants; import datawave.query.language.parser.ParseException; +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; import datawave.query.language.processor.lucene.QueryNodeProcessorFactory; import datawave.query.language.tree.QueryNode; import datawave.query.language.tree.ServerHeadNode; +import datawave.query.lucene.visitors.PrintingVisitor; public class TestLuceneToJexlQueryParser { diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/AmbigiousNotVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/AmbigiousNotVisitorTest.java new file mode 100644 index 00000000000..dbe149ed446 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/AmbigiousNotVisitorTest.java @@ -0,0 +1,169 @@ +package datawave.query.lucene.visitors; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; + +public class AmbigiousNotVisitorTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + private String query; + private final List expectedNodes = new ArrayList<>(); + + @AfterEach + void tearDown() { + query = null; + expectedNodes.clear(); + } + + /** + * Test a query that does not contain a NOT. + */ + @Test + void testQueryWithoutNOT() throws QueryNodeParseException { + givenQuery("FOO:123"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with a single unwrapped term preceding the NOT. + */ + @Test + void testNOTWithSingleUnwrappedPrecedingTerms() throws QueryNodeParseException { + givenQuery("FIELD1:abc NOT FIELD:def"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with a single wrapped term preceding the NOT. + */ + @Test + void testNOTWithSingleWrappedPrecedingTerms() throws QueryNodeParseException { + givenQuery("(FIELD1:abc) NOT FIELD:def"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with multiple wrapped terms preceding the NOT. + */ + @ParameterizedTest() + @ValueSource(strings = {"OR", "AND"}) + void testNOTWithWrappedMultiplePrecedingTerms(String junction) throws QueryNodeParseException { + givenQuery("(FIELD1:abc " + junction + " FIELD2:def) NOT FIELD:ghi"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with multiple unwrapped terms preceding the NOT. + */ + @ParameterizedTest() + @ValueSource(strings = {"OR", "AND"}) + void testNOTWithUnwrappedMultiplePrecedingTerms(String junction) throws QueryNodeParseException { + givenQuery("FIELD1:abc " + junction + " FIELD2:def NOT FIELD:ghi"); + + expectNode("FIELD1:abc " + junction + " FIELD2:def NOT FIELD:ghi"); + + assertResult(); + } + + /** + * Test a query with a NOT with multiple unwrapped terms preceding the NOT that will be automatically ANDed. + * + * @throws QueryNodeParseException + */ + @Test + void testNOTWithUnwrappedAutomaticallyAndedPreceedingTerms() throws QueryNodeParseException { + givenQuery("FIELD1:abc FIELD2:def NOT FIELD:ghi"); + + expectNode("FIELD1:abc FIELD2:def NOT FIELD:ghi"); + + assertResult(); + } + + /** + * Test a query with a NOT with multiple wrapped terms preceding the NOT that will be automatically ANDed. + * + * @throws QueryNodeParseException + */ + @Test + void testNOTWithWrappedAutomaticallyAndedPreceedingTerms() throws QueryNodeParseException { + givenQuery("(FIELD1:abc FIELD2:def) NOT FIELD:ghi"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query that does not consist entirely of a NOT. + */ + @Test + void testQueryWithTermThatIsNotPartOfNOT() throws QueryNodeParseException { + givenQuery("FIELD1:abc OR (FIELD2:abc FIELD3:def NOT FIELD4:ghi)"); + + expectNode("FIELD2:abc FIELD3:def NOT FIELD4:ghi"); + + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void expectNode(String query) throws QueryNodeParseException { + this.expectedNodes.add((NotBooleanQueryNode) parser.parse(query, "")); + } + + private void assertResult() throws QueryNodeParseException { + QueryNode queryNode = parser.parse(query, ""); + List actual = AmbigiousNotVisitor.check(queryNode); + // Compare the node lists via their query strings. + Assertions.assertThat(actual).usingElementComparator(QUERY_STR_COMPARATOR).isEqualTo(expectedNodes); + } + + /** + * A comparator implementation that will compare {@link NotBooleanQueryNode} based on their query strings. + */ + private static final Comparator QUERY_STR_COMPARATOR = new Comparator() { + + private final EscapeQuerySyntax escapedSyntax = new EscapeQuerySyntaxImpl(); + + @Override + public int compare(NotBooleanQueryNode first, NotBooleanQueryNode second) { + if (first == second) { + return 0; + } + if (first == null) { + return -1; + } + if (second == null) { + return 1; + } + return first.toQueryString(escapedSyntax).toString().compareTo(second.toQueryString(escapedSyntax).toString()); + } + }; +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/AmbiguousUnfieldedTermsVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/AmbiguousUnfieldedTermsVisitorTest.java new file mode 100644 index 00000000000..8ba9cd9a2f4 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/AmbiguousUnfieldedTermsVisitorTest.java @@ -0,0 +1,270 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; + +class AmbiguousUnfieldedTermsVisitorTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + private static final EscapeQuerySyntax escapedSyntax = new EscapeQuerySyntaxImpl(); + + private String query; + private AmbiguousUnfieldedTermsVisitor.JUNCTION junction; + private final List expectedNodes = new ArrayList<>(); + + @AfterEach + void tearDown() { + query = null; + junction = null; + expectedNodes.clear(); + } + + /** + * Test a query with a single fielded term. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithSingleFieldedTerm(String junction) throws QueryNodeParseException { + givenQuery("FOO:abc"); + givenJunction(junction); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a single fielded term. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithWrappedSingleFieldedTerm(String junction) throws QueryNodeParseException { + givenQuery("(FOO:abc)"); + givenJunction(junction); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query that consists of a single unfielded terms. Only unfielded terms directly following a fielded term are expected. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithUnfieldedTermOnly(String junction) throws QueryNodeParseException { + givenQuery("abc"); + givenJunction(junction); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query that consists of an unfielded term before a fielded term. Only unfielded terms directly following a fielded term are expected. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithUnfieldedTermBeforeFieldedTerm(String junction) throws QueryNodeParseException { + givenQuery("abc FOO:def"); + givenJunction(junction); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with quoted phrases. Only unquoted unfielded terms are expected. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithQuotedPhrases(String junction) throws QueryNodeParseException { + givenQuery("FOO:\"abc\" " + junction + " \"def\""); + givenJunction(junction); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query where terms are wrapped directly after a field name. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithWrappedTerms(String junction) throws QueryNodeParseException { + givenQuery("FOO:(abc " + junction + " def)"); + givenJunction(junction); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query where terms are wrapped multiple times in a nested fashion. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithNestedWrappedTerms(String junction) throws QueryNodeParseException { + givenQuery("FOO:(((abc " + junction + " def)))"); + givenJunction(junction); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query where a single unfielded term follows a fielded term. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithSingleUnfieldedTermAfterFieldedTerm(String junction) throws QueryNodeParseException { + givenQuery("FOO:abc " + junction + " def"); + givenJunction(junction); + + // Expect the terms. + expectNode("FOO:abc " + junction + " def"); + + assertResult(); + } + + /** + * Test a query where multiple unfielded terms follows a fielded term. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithMultipleUnfieldedTermAfterFieldedTerm(String junction) throws QueryNodeParseException { + givenQuery("FOO:abc " + junction + " def " + junction + " efg"); + givenJunction(junction); + + // Expect the terms. + expectNode("FOO:abc " + junction + " def " + junction + " efg"); + + assertResult(); + } + + /** + * Test a query where multiple unfielded terms follows a fielded term are all grouped. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithFullyGroupedFieldedTermAndUnfieldedTerms(String junction) throws QueryNodeParseException { + givenQuery("(FOO:abc " + junction + " def " + junction + " efg)"); + givenJunction(junction); + + // Expect the terms. + expectNode("(FOO:abc " + junction + " def " + junction + " efg)"); + + assertResult(); + } + + /** + * Test a query with unfielded terms nested within multiple groups. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithNestedUnfieldedTerms(String junction) throws QueryNodeParseException { + givenQuery("(FOO:abc " + junction + " (def " + junction + " efg " + junction + "(jkl)))"); + givenJunction(junction); + + // Expect the terms. + expectNode("(FOO:abc " + junction + " (def " + junction + " efg " + junction + "(jkl)))"); + + assertResult(); + } + + /** + * Test a query where multiple grouped unfielded terms follows a fielded term. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithFieldedTermAndGroupedUnfieldedTerms(String junction) throws QueryNodeParseException { + givenQuery("FOO:abc " + junction + " (def " + junction + " efg)"); + givenJunction(junction); + + // Expect the terms. + expectNode("FOO:abc " + junction + " (def " + junction + " efg)"); + + assertResult(); + } + + /** + * Test a query where the fielded term is in a sibling group. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithFieldedTermInSiblingGroup(String junction) throws QueryNodeParseException { + givenQuery("((FOO:abc " + junction + " def) " + junction + " (aaa " + junction + " bbb))"); + givenJunction(junction); + + // Only expect the terms from the first group sibling. + expectNode("(FOO:abc " + junction + " def)"); + + assertResult(); + } + + /** + * Test a query with multiple sets of ambiguous phrases. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testQueryWithMultipleAmbiguousPhrases(String junction) throws QueryNodeParseException { + givenQuery("FOO:abc " + junction + " def " + junction + " BAR:aaa " + junction + " bbb"); + givenJunction(junction); + + expectNode("FOO:abc " + junction + " def"); + expectNode("BAR:aaa " + junction + " bbb"); + + assertResult(); + } + + /** + * Test a query with a variety of ambiguous phrases. + */ + @ParameterizedTest + @ValueSource(strings = {"OR", "AND"}) + void testMixedComplexityQuery(String junction) throws QueryNodeParseException { + String otherJunction = junction.equals("OR") ? "AND" : "OR"; + givenQuery("FOO:aaa " + otherJunction + " bbb " + otherJunction + " (BAR:aaa " + junction + " bbb " + junction + " ccc " + junction + + " HAT:\"ear\" nose) " + junction + " (aaa " + junction + " bbb " + junction + " VEE:eee " + junction + " 123 " + junction + " (gee " + + junction + " \"wiz\")) " + otherJunction + " (EGG:yolk " + junction + " shell)"); + givenJunction(junction); + + expectNode("BAR:aaa " + junction + " bbb " + junction + " ccc"); + expectNode("VEE:eee " + junction + " 123"); + expectNode("(EGG:yolk " + junction + " shell)"); + + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void givenJunction(String junction) { + this.junction = AmbiguousUnfieldedTermsVisitor.JUNCTION.valueOf(junction); + } + + private void expectNode(String node) throws QueryNodeParseException { + expectedNodes.add(parser.parse(node, "")); + } + + private void assertResult() throws QueryNodeParseException { + QueryNode queryNode = parser.parse(query, ""); + List actual = AmbiguousUnfieldedTermsVisitor.check(queryNode, junction); + // Compare the lists via their query strings. + List actualStrs = actual.stream().map(node -> node.toQueryString(escapedSyntax).toString()).collect(Collectors.toList()); + List expectedStrs = expectedNodes.stream().map(node -> node.toQueryString(escapedSyntax).toString()).collect(Collectors.toList()); + Assertions.assertEquals(expectedStrs, actualStrs); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/BaseVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/BaseVisitorTest.java new file mode 100644 index 00000000000..6c77293587f --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/BaseVisitorTest.java @@ -0,0 +1,46 @@ +package datawave.query.lucene.visitors; + +import java.util.List; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; +import org.junit.Test; + +public class BaseVisitorTest { + + @Test + public void testQuery() throws QueryNodeParseException { + parseAndPrint("(FIELD:1234 OR 456) AND (BAR:435 OR BAR:54)"); + } + + private void parseAndPrint(String query) throws QueryNodeParseException { + System.out.println("Original query: " + query); + parseAndPrintStandard(query); + parseAndVisit(query); + } + + private void parseAndPrintStandard(String query) throws QueryNodeParseException { + StandardSyntaxParser parser = new StandardSyntaxParser(); + QueryNode node = parser.parse(query, ""); + System.out.println("Structure:"); + printNodeStructure(node, ""); + System.out.println("toString()"); + System.out.println(node.toString()); + } + + private void parseAndVisit(String query) throws QueryNodeParseException { + StandardSyntaxParser parser = new StandardSyntaxParser(); + QueryNode node = parser.parse(query, ""); + BaseVisitor visitor = new BaseVisitor(); + visitor.visit(node, null); + } + + private void printNodeStructure(QueryNode node, String indent) { + List children = node.getChildren(); + children = children == null ? List.of() : children; + System.out.println(node.getClass().getName() + " " + children.size()); + String newIndent = indent + " "; + children.forEach((c) -> printNodeStructure(c, newIndent)); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/CoreVisitorTests.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/CoreVisitorTests.java new file mode 100644 index 00000000000..52f3b0d0228 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/CoreVisitorTests.java @@ -0,0 +1,277 @@ +package datawave.query.lucene.visitors; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.junit.Test; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +public class CoreVisitorTests { + + /** + * Permutations to Test + * + *

    + * List of test cases using {@code assertValid} and {@code assertInvalid} statements that cover all possible forms of queries according to the modified + * Lucene syntax: + *

    + * + *

    + * (Only includes cases that are readable queries given the provided syntax guidelines) + *

    + * + *
    +     * {@code
    +     * // Single term queries
    +     * assertValid("FIELD1:abc");                        // Simple term
    +     * assertValid("FIELD1:ab*");                        // Wildcard term
    +     * assertValid("FIELD1:a?c");                        // Single character wildcard
    +     * assertValid("FIELD1:*lector");                    // Leading wildcard (as per syntax)
    +     * assertValid("abc*");                              // Wildcard without field
    +     * assertValid("\"quick brown dog\"");               // Phrase query (as per syntax)
    +     * assertValid("\"quick brown dog\"~20");            // Proximity search (as per syntax)
    +     * assertValid("#FUNCTION(ARG1, ARG2)");             // Function query
    +     *
    +     * // Range queries
    +     * assertValid("FIELD1:[begin TO end]");             // Inclusive range (as per syntax)
    +     * assertValid("FIELD1:{begin TO end}");             // Exclusive range (as per syntax)
    +     *
    +     * // Multiple terms with implicit AND (default operator)
    +     * assertValid("FIELD1:abc FIELD2:def");             // Implicit AND between terms
    +     * assertValid("FIELD1:abc FIELD2:def FIELD3:ghi");  // Multiple terms
    +     *
    +     * // Explicit AND operator
    +     * assertValid("FIELD1:abc AND FIELD2:def");         // Explicit AND
    +     * assertValid("FIELD1:abc AND FIELD2:def AND FIELD3:ghi");
    +     *
    +     * // Using OR operator
    +     * assertValid("FIELD1:abc OR FIELD2:def");          // Simple OR
    +     * assertValid("FIELD1:abc OR FIELD2:def OR FIELD3:ghi");
    +     * assertValid("FIELD1:abc AND FIELD2:def OR FIELD3:ghi");  // Combining AND and OR
    +     *
    +     * // Using NOT operator between terms
    +     * assertValid("FIELD1:abc NOT FIELD2:def");                         // NOT between terms
    +     * assertValid("FIELD1:abc NOT FIELD2:def NOT FIELD3:ghi");          // Multiple NOTs between terms
    +     * assertInvalid("FIELD1:abc NOT FIELD2:def FIELD3:ghi");            // Ambiguous NOT placement
    +     *
    +     * // Parentheses usage with NOT between terms
    +     * assertValid("(FIELD1:abc NOT FIELD2:def) AND FIELD3:ghi");        // NOT within parentheses
    +     * assertValid("FIELD1:abc AND (FIELD2:def NOT FIELD3:ghi)");        // NOT within parentheses
    +     * assertInvalid("(FIELD1:abc NOT FIELD2:def FIELD3:ghi)");          // Ambiguous within parentheses
    +     *
    +     * // Complex expressions without unary NOT
    +     * assertValid("FIELD1:abc NOT (FIELD2:def AND FIELD3:ghi)");        // NOT before group
    +     * assertValid("(FIELD1:abc AND FIELD2:def) NOT (FIELD3:ghi OR FIELD4:jkl)"); // NOT between groups
    +     *
    +     * // Nested parentheses without unary NOT
    +     * assertValid("((FIELD1:abc NOT FIELD2:def))");                     // Nested parentheses with NOT
    +     * assertValid("(FIELD1:abc NOT (FIELD2:def AND FIELD3:ghi))");      // NOT with nested group
    +     *
    +     * // Functions and phrases within complex expressions
    +     * assertValid("#FUNCTION(ARG1, ARG2) NOT (\"quick brown dog\"~20 AND FIELD1:abc)");
    +     * assertInvalid("#FUNCTION(ARG1, ARG2) NOT \"quick brown dog\"~20 FIELD1:abc"); // Ambiguous without parentheses
    +     *
    +     * // Wildcards and range queries with NOT between terms
    +     * assertValid("FIELD1:selec* NOT FIELD2:selec?or");                 // Wildcards with NOT
    +     * assertValid("FIELD1:[begin TO end] NOT FIELD2:{begin TO end}");    // Range queries with NOT
    +     * assertInvalid("FIELD1:[begin TO end] NOT FIELD2:{begin TO end} FIELD3:ghi"); // Ambiguous without operator
    +     *
    +     * // Escaping special characters in terms
    +     * assertValid("FIELD1:foo\\-bar NOT FIELD2:abc");                   // Escaped hyphen with NOT
    +     * assertValid("FIELD1:foo\\+bar AND FIELD2:abc");                   // Escaped plus with AND
    +     * assertValid("FIELD1:foo\\@bar OR FIELD2:abc");                    // Escaped at symbol with OR
    +     *
    +     * // Queries with proximity searches and functions combined
    +     * assertValid("\"quick brown dog\"~5 NOT (\"lazy fox\"~10 AND FIELD1:abc)");
    +     * assertValid("#FUNCTION(ARG1) AND (#FUNCTION(ARG1, ARG2) NOT FIELD2:def)");
    +     *
    +     * // Queries with special terms and modifiers
    +     * assertValid("FIELD1:\"complex term\" NOT FIELD2:[begin TO end]");
    +     * assertInvalid("FIELD1:{* TO end} NOT FIELD2:selec* NOT FIELD3:selec?or"); // Ambiguous without parentheses
    +     * // Corrected unambiguous version
    +     * assertValid("FIELD1:{* TO end} NOT (FIELD2:selec* OR FIELD3:selec?or)");  // Parentheses resolve ambiguity
    +     *
    +     * // Testing precedence and grouping without unary NOT
    +     * assertValid("FIELD1:abc AND ((FIELD2:def OR FIELD3:ghi) NOT FIELD4:jkl)");
    +     * assertInvalid("FIELD1:abc AND FIELD2:def OR FIELD3:ghi NOT FIELD4:jkl FIELD5:mno"); // Ambiguous without parentheses
    +     *
    +     * }
    +     * 
    + * + * Explanation: + *
      + *
    • Single Term Queries: Tests basic terms, wildcards, phrases, proximity searches, and functions as per the provided syntax.
    • + *
    • Range Queries: Covers inclusive and exclusive ranges according to the syntax.
    • + *
    • Implicit AND Operator: Checks multiple terms without explicit operators.
    • + *
    • Explicit AND/OR Operators: Validates queries using {@code AND} and {@code OR}.
    • + *
    • NOT Operator Between Terms: Tests placement of {@code NOT} between terms and identifies ambiguous cases.
    • + *
    • Parentheses Usage with NOT: Ensures parentheses group expressions correctly when using {@code NOT}.
    • + *
    • Complex Expressions: Combines multiple operators and parentheses without unary {@code NOT}.
    • + *
    • Functions and Phrases: Tests advanced terms in complex queries.
    • + *
    • Wildcards and Range Queries with NOT: Validates {@code NOT} with wildcards and ranges between terms.
    • + *
    • Precedence and Grouping: Checks operator precedence and grouping without unary {@code NOT}.
    • + *
    • Escaping Special Characters: Ensures special characters are properly escaped in various contexts.
    • + *
    • Edge Cases: Covers ambiguous cases and ensures they are properly flagged as invalid. For example, multiple {@code NOT} operators + * without parentheses can lead to ambiguity.
    • + *
    • Reserved Words as Field Names or Terms: Confirms that reserved words can be used as field names or terms where appropriate.
    • + *
    + * + *

    + * These test cases cover a wide range of possible query forms, ensuring that both valid and invalid (ambiguous) queries are properly identified according + * to the provided syntax guidelines. + *

    + */ + + // private boolean logQueryTrees = true; + // + // /*Specific test cases*/ + // + // /** + // * Ivan's ask #1 + // */ + // @Test + // public void testAmbiguousJunctions() throws Exception { + // assertInvalid("FIELD1:1234 5678", new Exception()); // Ambiguous without grouping + // assertInvalid("(FIELD1:1234 5678)", new Exception()); // Ambiguous without grouping + // assertInvalid("FIELD1:1234 AND 5678", new Exception()); // Ambiguous without grouping + // assertInvalid("FIELD1:1234 OR 5678", new Exception()); // Ambiguous without grouping + // assertValid("FIELD1:(1234 AND 5678)"); // Grouped terms + // assertValid("FIELD1:(1234 OR 5678)"); // Grouped terms + // assertValid("FIELD1:(1234 5678)"); // Grouped terms + // } + // + // /** + // * Ivan's ask #2 + // */ + // @Test + // public void testUnquotedPhrases() throws Exception { + // assertInvalid("FIELD:term1 term2", new Exception()); // Ambiguous without quotes + // assertInvalid("FIELD:term1 AND term2", new Exception()); // Ambiguous without quotes + // assertInvalid("FIELD:term1 OR term2", new Exception()); // Ambiguous without quotes + // assertInvalid("\"FIELD:term1 term2\"", new Exception()); //Wrong quote placement + // assertInvalid("\"FIELD:term1 AND term2\"", new Exception());//Wrong quote placement + // assertInvalid("\"FIELD:term1 OR term2\"", new Exception()); //Wrong quote placement + // assertValid("FIELD:\"term1 term2\""); + // assertValid("FIELD:\"term1 AND term2\""); + // assertValid("FIELD:\"term1 OR term2\""); + // } + // + // /*General test cases*/ + // + // @Test + // public void testSingleTermQueries() throws Exception { + // assertValid("FIELD1:abc"); // Simple term + // assertValid("FIELD1:ab*"); // Wildcard term + // assertValid("FIELD1:a?c"); // Single character wildcard + // assertValid("FIELD1:*lector"); // Leading wildcard (as per syntax) + // assertValid("abc*"); // Wildcard without field + // assertValid("\"quick brown dog\""); // Phrase query + // assertValid("\"quick brown dog\"~20"); // Proximity search + // assertValid("#FUNCTION(ARG1, ARG2)"); // Function query + // } + // + // @Test + // public void testRangeQueries() throws Exception { + // assertValid("FIELD1:[begin TO end]"); // Inclusive range + // assertValid("FIELD1:{begin TO end}"); // Exclusive range + // } + // + // @Test + // public void testImplicitAndOperator() throws Exception { + // assertValid("FIELD1:abc FIELD2:def"); // Implicit AND between terms + // assertValid("FIELD1:abc FIELD2:def FIELD3:ghi"); // Multiple terms + // } + // + // @Test + // public void testExplicitAndOrOperators() throws Exception { + // assertValid("FIELD1:abc AND FIELD2:def"); // Explicit AND + // assertValid("FIELD1:abc AND FIELD2:def AND FIELD3:ghi"); + // assertValid("FIELD1:abc OR FIELD2:def"); // Simple OR + // assertValid("FIELD1:abc OR FIELD2:def OR FIELD3:ghi"); + // assertInvalid("FIELD1:abc AND FIELD2:def OR FIELD3:ghi", new Exception()); // Combining AND and OR + // assertInvalid("FIELD1:abc OR FIELD2:def AND FIELD3:ghi", new Exception()); // Also ambiguous + // } + // + // @Test + // public void testGroupedTerms() throws Exception { + // assertValid("FIELD1:(abc def)"); // Multiple terms in field value + // assertValid("FIELD1:(abc AND def)"); // Multiple terms in field value + // assertValid("FIELD1:(abc OR def)"); // Multiple terms in field value + // assertInvalid("FIELD1:abc def", new Exception()); // Ambiguous without grouping + // assertInvalid("(FIELD1:abc def)", new Exception()); // Ambiguous without grouping + // assertInvalid("FIELD1:abc AND def", new Exception()); // Ambiguous without grouping + // assertInvalid("FIELD1:abc OR def", new Exception()); // Ambiguous without grouping + // } + // + // + // @Test + // public void testNotOperatorBetweenTerms() throws Exception { + // assertValid("FIELD1:abc NOT FIELD2:def"); // NOT between two terms + // assertInvalid("FIELD1:abc NOT FIELD2:def NOT FIELD3:ghi", new Exception()); // Multiple NOTs between terms + // assertInvalid("FIELD1:abc NOT FIELD2:def FIELD3:ghi", new Exception()); // Ambiguous NOT placement + // } + // + // @Test + // public void testParenthesesUsageWithNot() throws Exception { + // assertValid("(FIELD1:abc NOT FIELD2:def) AND FIELD3:ghi"); // NOT within parentheses + // assertValid("FIELD1:abc AND (FIELD2:def NOT FIELD3:ghi)"); // NOT within parentheses + // assertInvalid("(FIELD1:abc NOT FIELD2:def FIELD3:ghi)", new Exception()); // Ambiguous within parentheses + // } + // + // @Test + // public void testComplexExpressions() throws Exception { + // assertValid("FIELD1:abc NOT (FIELD2:def AND FIELD3:ghi)"); // NOT before group + // assertValid("(FIELD1:abc AND FIELD2:def) NOT (FIELD3:ghi OR FIELD4:jkl)"); // NOT between groups + // } + // + // @Test + // public void testNestedParenthesesWithNot() throws Exception { + // assertValid("((FIELD1:abc NOT FIELD2:def))"); // Nested parentheses with NOT + // assertValid("(FIELD1:abc NOT (FIELD2:def AND FIELD3:ghi))"); // NOT with nested group + // } + // + // + // @Test + // public void testFunctionsAndPhrasesInComplexExpressions() throws Exception { + // assertValid("#FUNCTION(ARG1, ARG2) NOT (\"quick brown dog\"~20 AND FIELD1:abc)"); + // assertInvalid("#FUNCTION(ARG1, ARG2) NOT \"quick brown dog\"~20 FIELD1:abc", new Exception()); // Ambiguous without parentheses + // } + // + // + // @Test + // public void testWildcardsAndRangeQueriesWithNot() throws Exception { + // assertValid("FIELD1:selec* NOT FIELD2:selec?or"); // Wildcards with NOT + // assertValid("FIELD1:[begin TO end] NOT FIELD2:{begin TO end}"); // Range queries with NOT + // assertInvalid("FIELD1:[begin TO end] NOT FIELD2:{begin TO end} FIELD3:ghi", new Exception()); // Ambiguous without operator + // } + // + // + // @Test + // public void testEscapingSpecialCharacters() throws Exception { + // assertValid("FIELD1:foo\\-bar NOT FIELD2:abc"); // Escaped hyphen with NOT + // assertValid("FIELD1:foo\\+bar AND FIELD2:abc"); // Escaped plus with AND + // assertValid("FIELD1:foo\\@bar OR FIELD2:abc"); // Escaped at symbol with OR + // } + // + // + // @Test + // public void testQueriesWithProximitySearchesAndFunctionsCombined() throws Exception { + // assertValid("\"quick brown dog\"~5 NOT (\"lazy fox\"~10 AND FIELD1:abc)"); + // assertValid("#FUNCTION(ARG1) AND (#FUNCTION(ARG1, ARG2) NOT FIELD2:def)"); + // } + // + // + // @Test + // public void testQueriesWithSpecialTermsAndModifiersAndNot() throws Exception { + // assertValid("FIELD1:\"complex term\" NOT FIELD2:[begin TO end]"); + // assertInvalid("FIELD1:{* TO end} NOT FIELD2:selec* NOT FIELD3:selec?or", new Exception()); // Ambiguous without parentheses + // assertValid("FIELD1:{* TO end} NOT (FIELD2:selec* OR FIELD3:selec?or)"); // Parentheses resolve ambiguity + // } + // + // + // @Test + // public void testTestingPrecedenceAndGrouping() throws Exception { + // assertValid("FIELD1:abc AND ((FIELD2:def OR FIELD3:ghi) NOT FIELD4:jkl)"); + // assertInvalid("FIELD1:abc AND FIELD2:def OR FIELD3:ghi NOT FIELD4:jkl FIELD5:mno", new Exception()); // Ambiguous without parentheses + // } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidIncludeExcludeArgsVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidIncludeExcludeArgsVisitorTest.java new file mode 100644 index 00000000000..dfaaa325ebb --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidIncludeExcludeArgsVisitorTest.java @@ -0,0 +1,244 @@ +package datawave.query.lucene.visitors; + +import static datawave.query.lucene.visitors.InvalidIncludeExcludeArgsVisitor.REASON.NO_ARGS; +import static datawave.query.lucene.visitors.InvalidIncludeExcludeArgsVisitor.REASON.NO_ARGS_AFTER_BOOLEAN; +import static datawave.query.lucene.visitors.InvalidIncludeExcludeArgsVisitor.REASON.UNEVEN_ARGS; +import static datawave.query.lucene.visitors.InvalidIncludeExcludeArgsVisitor.REASON.UNEVEN_ARGS_AFTER_BOOLEAN; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +public class InvalidIncludeExcludeArgsVisitorTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + private String query; + private final List expected = new ArrayList<>(); + + @AfterEach + public void tearDown() throws Exception { + query = null; + expected.clear(); + } + + /** + * Test a query that does not have the INCLUDE or EXCLUDE function. + */ + @Test + public void testQueryWithNoIncludeOrExcludeFunction() throws QueryNodeParseException { + givenQuery("FOO:'abc'"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithSingleFieldAndValue(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(FOO,'abc')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithMultipleFieldAndValue(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(FOO,'abc',BAR,'def')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithSingleFieldAndValueAfterOR(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(OR,FOO,'abc')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithSingleFieldAndValueAfterAND(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(AND,FOO,'abc')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with multiple fields and values after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithMultipleFieldsAndValuesAfterOR(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(OR,FOO,'abc',BAR,'def')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with multiple fields and values after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithMultipleFieldsAndValuesAfterAND(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(AND,FOO,'abc',BAR,'def')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with just a single arg. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithSingleArg(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(FIELD)"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("FIELD"), UNEVEN_ARGS)); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with an uneven number of arguments greater than one without a boolean arg. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithUnevenArgs(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(FIELD1,'value',FIELD2)"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("FIELD1", "'value'", "FIELD2"), UNEVEN_ARGS)); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with no arguments after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithNoArgsAfterOR(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(OR)"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("OR"), NO_ARGS_AFTER_BOOLEAN)); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with no arguments after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithNoArgsAfterAND(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(AND)"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("AND"), NO_ARGS_AFTER_BOOLEAN)); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single argument after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithSingleArgsAfterOR(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(OR,'value')"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("OR", "'value'"), UNEVEN_ARGS_AFTER_BOOLEAN)); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single argument after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithSingleArgsAfterAND(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(AND,'value')"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("AND", "'value'"), UNEVEN_ARGS_AFTER_BOOLEAN)); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with an uneven number of arguments greater than one after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithUnevenArgsAfterOR(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(OR,FIELD1,'value',FIELD2)"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("OR", "FIELD1", "'value'", "FIELD2"), UNEVEN_ARGS_AFTER_BOOLEAN)); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with an uneven number of arguments greater than one after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithUnevenArgsAfterAND(String name) throws QueryNodeParseException { + givenQuery("#" + name + "(AND,FIELD1,'value',FIELD2)"); + expect(new InvalidIncludeExcludeArgsVisitor.InvalidFunction(name, List.of("AND", "FIELD1", "'value'", "FIELD2"), UNEVEN_ARGS_AFTER_BOOLEAN)); + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void expect(InvalidIncludeExcludeArgsVisitor.InvalidFunction function) { + this.expected.add(function); + } + + private void assertResult() throws QueryNodeParseException { + QueryNode queryNode = parser.parse(query, ""); + List actual = InvalidIncludeExcludeArgsVisitor.check(queryNode); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidQuoteVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidQuoteVisitorTest.java new file mode 100644 index 00000000000..100485ecdd8 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidQuoteVisitorTest.java @@ -0,0 +1,139 @@ +package datawave.query.lucene.visitors; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +public class InvalidQuoteVisitorTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + private String query; + private final List expected = new ArrayList<>(); + + @AfterEach + void tearDown() { + query = null; + expected.clear(); + } + + /** + * Test a query that does not contain any phrases with invalid quotes. + */ + @Test + void testQueryWithoutInvalidQuotes() throws QueryNodeParseException { + givenQuery("FOO:'abc' OR FOO:'def'"); + // Do not expect to find any phrases. + assertResult(); + } + + /** + * Test a query that contains phrases with invalid quotes at both ends. + */ + @Test + void testQueryWithInvalidQuotesAtBothEndsOfPhrases() throws QueryNodeParseException { + givenQuery("FOO:`abc` OR FOO:`def` OR FOO:'efg'"); + expect("FOO:`abc`"); + expect("FOO:`def`"); + assertResult(); + } + + /** + * Test a query that contains a phrase with an invalid quote at the start. + */ + @Test + void testQueryWithInvalidQuotesAtStartOPhrase() throws QueryNodeParseException { + givenQuery("FOO:`abc' OR FOO:'efg'"); + expect("FOO:`abc'"); + assertResult(); + } + + /** + * Test a query that contains the invalid quote within the phrase, but not at either end. + */ + @Test + void testQueryWithEmptyInvalidQuotedInMiddle() throws QueryNodeParseException { + givenQuery("FOO:'ab`cd' OR FOO:'efg'"); + // Do not expect to find any phrases. + assertResult(); + } + + /** + * Test a query that contains a phrase with an invalid quote at the end. + */ + @Test + void testQueryWithInvalidQuotesAtEndOPhrase() throws QueryNodeParseException { + givenQuery("FOO:'abc` OR FOO:'efg'"); + expect("FOO:'abc`"); + assertResult(); + } + + /** + * Test a query that contains a phrase with an empty phrase with invalid quotes. + */ + @Test + void testQueryWithEmptyInvalidQuotedPhrase() throws QueryNodeParseException { + givenQuery("FOO:`` OR FOO:'efg'"); + expect("FOO:``"); + assertResult(); + } + + /** + * Test a query that contains a phrase that is just one invalid quote. + */ + @Test + void testPhraseThatConsistsOfSingleInvalidQuote() throws QueryNodeParseException { + givenQuery("FOO:` OR FOO:'efg'"); + expect("FOO:`"); + assertResult(); + } + + /** + * Test a query that contains a phrase with an invalid quote inside a function. + */ + @Test + void testFunctionWithInvalidQuote() throws QueryNodeParseException { + givenQuery("FOO:'abc' AND #INCLUDE(BAR,`def`)"); + expect("#INCLUDE(BAR,`def`)"); + assertResult(); + } + + /** + * Test unfielded terms with invalid quotes. + */ + @Test + void testTermWithInvalidQuote() throws QueryNodeParseException { + givenQuery("`def` `abc`"); + expect("`def`"); + expect("`abc`"); + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void expect(String phrase) throws QueryNodeParseException { + expected.add(parser.parse(phrase, "")); + } + + private void assertResult() throws QueryNodeParseException { + QueryNode node = parser.parse(query, ""); + List actual = InvalidQuoteVisitor.check(node); + List actualStrings = actual.stream().map(LuceneQueryStringBuildingVisitor::build).collect(Collectors.toList()); + List expectedStrings = expected.stream().map(LuceneQueryStringBuildingVisitor::build).collect(Collectors.toList()); + // Compare the query strings. + Assertions.assertEquals(expectedStrings, actualStrings); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidSlopProximityVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidSlopProximityVisitorTest.java new file mode 100644 index 00000000000..b86dee32b74 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/InvalidSlopProximityVisitorTest.java @@ -0,0 +1,210 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +public class InvalidSlopProximityVisitorTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + private QueryNode query; + + private final List expected = new ArrayList<>() {}; + + @After + public void tearDown() throws Exception { + query = null; + clearExpected(); + } + + /** + * Test a query without a slop value. + */ + @Test + public void testSlopAbsent() throws QueryNodeParseException { + givenQuery(parse("FIELD:abc")); + assertResults(); + } + + /** + * Test a query with a slop operator but no numeric value. + */ + @Test + public void testSlopNumberAbsent() throws QueryNodeParseException { + givenQuery(parse("FIELD:\"term1 term2 term3\"~")); + assertResults(); + } + + /** + * Test a proximity query with a single term and a slop value less than the minimum allowed. + */ + @Test + public void testSingleTermProximityLessThan() throws Exception { + QueryNode node = parse("FIELD:\"term1\"~0"); + givenQuery(node); + expect(new InvalidSlopProximityVisitor.InvalidSlop((SlopQueryNode) node, 1)); + assertResults(); + } + + /** + * Test a proximity query with multiple terms and a slop value less than the minimum allowed. + */ + @Test + public void testMultipleTermProximityLessThan() throws Exception { + QueryNode node = parse("FIELD:\"term1 term2\"~1"); + givenQuery(node); + expect(new InvalidSlopProximityVisitor.InvalidSlop((SlopQueryNode) node, 2)); + assertResults(); + } + + /** + * Test a proximity query with a single term and a slop value equal to the minimum allowed. + */ + @Test + public void testSingleTermProximityEqualTo() throws Exception { + givenQuery(parse("FIELD:\"term1\"~1")); + assertResults(); + } + + /** + * Test a proximity query with multiple terms and a slop value equal to the minimum allowed. + */ + @Test + public void testMultipleTermProximityEqualTo() throws Exception { + givenQuery(parse("FIELD:\"term1 term2\"~2")); + assertResults(); + } + + /** + * Test a proximity query with a single term and a slop value greater than the minimum allowed. + */ + @Test + public void testSingleTermProximityGreaterThan() throws Exception { + givenQuery(parse("FIELD:\"term1\"~2")); + assertResults(); + } + + /** + * Test a proximity query with multiple terms and a slop value greater than the minimum allowed. + */ + @Test + public void testMultipleTermProximityGreaterThan() throws Exception { + givenQuery(parse("FIELD:\"term1 term2\"~3")); + assertResults(); + } + + /** + * Test a proximity query with padded white space on the left. + */ + @Test + public void testValidWithPaddedWhiteSpaceLeft() throws Exception { + givenQuery(parse("FIELD:\" term1 term2 term3\"~3")); + assertResults(); + } + + /** + * Test a proximity query with padded white space on the right. + */ + @Test + public void testValidWithPaddedWhiteSpaceRight() throws Exception { + givenQuery(parse("FIELD:\"term1 term2 term3 \"~3")); + assertResults(); + } + + /** + * Test a proximity query with padded white space between terms. + */ + @Test + public void testValidWithPaddedWhiteSpaceBetween() throws Exception { + givenQuery(parse("FIELD:\"term1 term2 term3\"~3")); + assertResults(); + } + + /** + * Test a proximity query with padded white space on both left and right sides. + */ + @Test + public void testValidWithPaddedWhiteSpaceLeftRight() throws Exception { + givenQuery(parse("FIELD:\" term1 term2 term3 \"~3")); + assertResults(); + } + + /** + * Test an invalid proximity query with padded white space on the left and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceLeft() throws Exception { + QueryNode node = parse("FIELD:\" term1 term2 term3\"~2"); + givenQuery(node); + expect(new InvalidSlopProximityVisitor.InvalidSlop((SlopQueryNode) node, 3)); + assertResults(); + } + + /** + * Test an invalid proximity query with padded white space on the right and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceRight() throws Exception { + QueryNode node = parse("FIELD:\"term1 term2 term3 \"~2"); + givenQuery(node); + expect(new InvalidSlopProximityVisitor.InvalidSlop((SlopQueryNode) node, 3)); + assertResults(); + } + + /** + * Test an invalid proximity query with padded white space between terms and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceBetween() throws Exception { + QueryNode node = parse("FIELD:\"term1 term2 term3\"~2"); + givenQuery(node); + expect(new InvalidSlopProximityVisitor.InvalidSlop((SlopQueryNode) node, 3)); + assertResults(); + } + + /** + * Test an invalid proximity query with padded white space on both sides and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceLeftRight() throws Exception { + QueryNode node = parse("FIELD:\" term1 term2 term3 \"~2"); + givenQuery(node); + expect(new InvalidSlopProximityVisitor.InvalidSlop((SlopQueryNode) node, 3)); + assertResults(); + } + + private QueryNode parse(String query) throws QueryNodeParseException { + return parser.parse(query, ""); + } + + private void givenQuery(QueryNode query) { + this.query = query; + } + + private void expect(InvalidSlopProximityVisitor.InvalidSlop invalidSlop) { + this.expected.add(invalidSlop); + } + + private void clearExpected() { + this.expected.clear(); + } + + private void assertResults() throws QueryNodeParseException { + List actual = InvalidSlopProximityVisitor.check(query); + List actualStrings = actual.stream().map(Object::toString).collect(Collectors.toList()); + List expectedStrings = expected.stream().map(Object::toString).collect(Collectors.toList()); + Assert.assertEquals(expectedStrings, actualStrings); + clearExpected(); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/LuceneQueryStringBuildingVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/LuceneQueryStringBuildingVisitorTest.java new file mode 100644 index 00000000000..7602a9fdc4e --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/LuceneQueryStringBuildingVisitorTest.java @@ -0,0 +1,795 @@ +package datawave.query.lucene.visitors; + +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; +import org.apache.lucene.queryparser.flexible.core.QueryNodeException; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.AnyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.DeletedQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FunctionQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchAllDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.MatchNoDocsQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NoTokenFoundQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.NotBooleanQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OpaqueQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PathQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.PhraseSlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.ProximityQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; +import org.apache.lucene.queryparser.flexible.core.nodes.TokenizedPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.EscapeQuerySyntax; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.apache.lucene.queryparser.flexible.core.util.UnescapedCharSequence; +import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig; +import org.apache.lucene.queryparser.flexible.standard.nodes.AbstractRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.BooleanModifierNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.MultiPhraseQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PointRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.SynonymQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode; +import org.checkerframework.checker.units.qual.A; +import org.junit.Assert; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.opengis.filter.Or; +import org.springframework.security.core.parameters.P; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; +import datawave.query.language.parser.lucene.EscapeQuerySyntaxImpl; + +public class LuceneQueryStringBuildingVisitorTest { + + private static final EscapeQuerySyntax escapeQuerySyntax = new EscapeQuerySyntaxImpl(); + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + private QueryNode queryNode; + private String expectedQuery; + + @AfterEach + void tearDown() { + queryNode = null; + expectedQuery = null; + } + + /** + * Test a {@link FieldQueryNode} with a non-empty field. + */ + @Test + void testFieldQueryNodeWithField() throws QueryNodeParseException { + FieldQueryNode node = new FieldQueryNode("FIELD", "abc", 0, 3); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link FieldQueryNode} with an empty field. + */ + @Test + void testFieldQueryNodeWithoutField() throws QueryNodeParseException { + FieldQueryNode node = new FieldQueryNode("", "abc", 0, 3); + givenQueryNode(node); + + expect("abc"); + + assertResult(); + } + + /** + * Test a {@link FieldQueryNode} with a non-empty field. + */ + @Test + void testQuotedFieldQueryNodeWithField() throws QueryNodeParseException { + FieldQueryNode node = new QuotedFieldQueryNode("FIELD", "abc", 0, 3); + givenQueryNode(node); + + expect("FIELD:\"abc\""); + + assertResult(); + } + + /** + * Test a {@link FieldQueryNode} with an empty field. + */ + @Test + void testQuotedFieldQueryNodeWithoutField() throws QueryNodeParseException { + FieldQueryNode node = new QuotedFieldQueryNode("", "abc", 0, 3); + givenQueryNode(node); + + expect("\"abc\""); + + assertResult(); + } + + /** + * Test a {@link AndQueryNode} with no parent. + * + * @throws QueryNodeParseException + */ + @Test + void testAndQueryNodeWithoutParent() throws QueryNodeParseException { + ArrayList clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("BAR", "def", 4, 7)); + AndQueryNode node = new AndQueryNode(clauses); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link AndQueryNode} with a group parent. + * + * @throws QueryNodeParseException + */ + @Test + void testAndQueryNodeWithGroupParent() throws QueryNodeParseException { + ArrayList clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("BAR", "def", 4, 7)); + AndQueryNode node = new AndQueryNode(clauses); + GroupQueryNode groupNode = new GroupQueryNode(node); + + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link AndQueryNode} with a parent that is not a {@link GroupQueryNode}. + * + * @throws QueryNodeParseException + */ + @Test + void testAndQueryNodeWithNonGroupParent() throws QueryNodeParseException { + ArrayList clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("BAR", "def", 4, 7)); + AndQueryNode node = new AndQueryNode(clauses); + OrQueryNode orNode = new OrQueryNode(List.of(node)); + + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link OrQueryNode} with no parent. + * + * @throws QueryNodeParseException + */ + @Test + void testOrQueryNodeWithoutParent() throws QueryNodeParseException { + ArrayList clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("BAR", "def", 4, 7)); + OrQueryNode node = new OrQueryNode(clauses); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link OrQueryNode} with a group parent. + * + * @throws QueryNodeParseException + */ + @Test + void testOrQueryNodeWithGroupParent() throws QueryNodeParseException { + ArrayList clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("BAR", "def", 4, 7)); + OrQueryNode node = new OrQueryNode(clauses); + GroupQueryNode groupNode = new GroupQueryNode(node); + + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link OrQueryNode} with a parent that is not a {@link GroupQueryNode}. + * + * @throws QueryNodeParseException + */ + @Test + void testOrQueryNodeWithNonGroupParent() throws QueryNodeParseException { + ArrayList clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("BAR", "def", 4, 7)); + OrQueryNode node = new OrQueryNode(clauses); + AndQueryNode andNode = new AndQueryNode(List.of(node)); + + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link GroupQueryNode}. + * + * @throws QueryNodeParseException + */ + @Test + void testGroupQueryNode() throws QueryNodeParseException { + GroupQueryNode node = new GroupQueryNode(parse("FIELD1:abc")); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link BoostQueryNode}. + */ + @Test + void testBoostQueryNode() throws QueryNodeParseException { + BoostQueryNode node = new BoostQueryNode(new FieldQueryNode("FOO", "abc", 0, 3), 2F); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link DeletedQueryNode}. + */ + @Test + void testDeletedQueryNode() throws QueryNodeParseException { + DeletedQueryNode node = new DeletedQueryNode(); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link DeletedQueryNode}. + */ + @Test + void testFuzzyQueryNodeWithField() throws QueryNodeParseException { + FuzzyQueryNode node = new FuzzyQueryNode("FOO", "abc", 2, 0, 3); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link DeletedQueryNode}. + */ + @Test + void testFuzzyQueryNodeWithoutField() throws QueryNodeParseException { + FuzzyQueryNode node = new FuzzyQueryNode("", "abc", 2, 0, 3); + givenQueryNode(node); + + expect("abc~2.0"); + + assertResult(); + } + + /** + * Test a {@link MatchAllDocsQueryNode}. + */ + @Test + void testMatchAllDocsQueryNode() throws QueryNodeParseException { + MatchAllDocsQueryNode node = new MatchAllDocsQueryNode(); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link MatchNoDocsQueryNode}. + */ + @Test + void testMatchNoDocsQueryNode() throws QueryNodeParseException { + MatchNoDocsQueryNode node = new MatchNoDocsQueryNode(); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link NoTokenFoundQueryNode}. + */ + @Test + void testNoTokenFoundQueryNode() throws QueryNodeParseException { + NoTokenFoundQueryNode node = new NoTokenFoundQueryNode(); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link OpaqueQueryNode}. + */ + @Test + void testOpaqueQueryNode() throws QueryNodeParseException { + OpaqueQueryNode node = new OpaqueQueryNode("wiki", "abc"); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link PathQueryNode}. + */ + @Test + void testPathQueryNode() throws QueryNodeParseException { + List elements = new ArrayList<>(); + elements.add(new PathQueryNode.QueryText("etc", 0, 3)); + elements.add(new PathQueryNode.QueryText("udev", 0, 3)); + elements.add(new PathQueryNode.QueryText("dev.conf", 0, 3)); + PathQueryNode node = new PathQueryNode(elements); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link PhraseSlopQueryNode}. + */ + @Test + void testPhraseSlopQueryNode() throws QueryNodeParseException { + PhraseSlopQueryNode node = new PhraseSlopQueryNode(new FieldQueryNode("FOO", "abc", 0, 3), 2); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link SlopQueryNode}. + */ + @Test + void testSlopQueryNode() throws QueryNodeParseException { + SlopQueryNode node = new SlopQueryNode(new FieldQueryNode("FOO", "abc", 0, 3), 2); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link TokenizedPhraseQueryNode}. + */ + @Test + void testTokenizedPhraseQueryNode() throws QueryNodeParseException { + TokenizedPhraseQueryNode node = new TokenizedPhraseQueryNode(); + node.add(new FieldQueryNode("FOO", "abc", 0, 3)); + node.add(new FieldQueryNode("BAR", "def", 0, 3)); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link MultiPhraseQueryNode}. + */ + @Test + void testMultiPhraseQueryNode() throws QueryNodeParseException { + MultiPhraseQueryNode node = new MultiPhraseQueryNode(); + node.add(new FieldQueryNode("FOO", "abc", 0, 3)); + node.add(new FieldQueryNode("BAR", "def", 0, 3)); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link PointQueryNode} with a field. + */ + @Test + void testPointQueryNodeWithField() throws QueryNodeParseException { + PointQueryNode node = new PointQueryNode("FOO", 23, NumberFormat.getIntegerInstance()); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link PointQueryNode} with a blank field. + */ + @Test + void testPointQueryNodeWithoutField() throws QueryNodeParseException { + String value = NumberFormat.getCurrencyInstance(Locale.US).format(23); + PointQueryNode node = new PointQueryNode("", 23, NumberFormat.getCurrencyInstance(Locale.US)); + givenQueryNode(node); + + expect(value); + + assertResult(); + } + + /** + * Test a {@link PointRangeQueryNode} where both bounds are inclusive. + */ + @Test + void testPointRangeQueryNodeBothBoundsInclusive() throws QueryNodeException { + PointsConfig config = new PointsConfig(NumberFormat.getIntegerInstance(), Integer.class); + + PointQueryNode lowerBound = new PointQueryNode("FOO", 1, NumberFormat.getIntegerInstance()); + PointQueryNode upperBound = new PointQueryNode("FOO", 5, NumberFormat.getIntegerInstance()); + + PointRangeQueryNode rangeNode = new PointRangeQueryNode(lowerBound, upperBound, true, true, config); + givenQueryNode(rangeNode); + + expectExactMatchToQueryString(rangeNode); + + assertResult(); + } + + /** + * Test a {@link PointRangeQueryNode} where both bounds are exclusive. + */ + @Test + void testPointRangeQueryNodeBothBoundExclusive() throws QueryNodeException { + PointsConfig config = new PointsConfig(NumberFormat.getIntegerInstance(), Integer.class); + + PointQueryNode lowerBound = new PointQueryNode("FOO", 1, NumberFormat.getIntegerInstance()); + PointQueryNode upperBound = new PointQueryNode("FOO", 5, NumberFormat.getIntegerInstance()); + + PointRangeQueryNode rangeNode = new PointRangeQueryNode(lowerBound, upperBound, false, false, config); + givenQueryNode(rangeNode); + + expectExactMatchToQueryString(rangeNode); + + assertResult(); + } + + /** + * Test a {@link PointRangeQueryNode} where one bound is exclusive. + */ + @Test + void testPointRangeQueryNodeOneBoundExclusive() throws QueryNodeException { + PointsConfig config = new PointsConfig(NumberFormat.getIntegerInstance(), Integer.class); + + PointQueryNode lowerBound = new PointQueryNode("FOO", 1, NumberFormat.getIntegerInstance()); + PointQueryNode upperBound = new PointQueryNode("FOO", 5, NumberFormat.getIntegerInstance()); + + PointRangeQueryNode rangeNode = new PointRangeQueryNode(lowerBound, upperBound, false, true, config); + givenQueryNode(rangeNode); + + expectExactMatchToQueryString(rangeNode); + + assertResult(); + } + + /** + * Test a {@link PrefixWildcardQueryNode} with a non-blank field. + */ + @Test + void testPrefixWildcardNodeWithField() throws QueryNodeParseException { + PrefixWildcardQueryNode node = new PrefixWildcardQueryNode("FOO", "ab*", 0, 3); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link PrefixWildcardQueryNode} with a blank field. + */ + @Test + void testPrefixWildcardNodeWithoutField() throws QueryNodeParseException { + PrefixWildcardQueryNode node = new PrefixWildcardQueryNode("", "ab*", 0, 3); + givenQueryNode(node); + + expect("ab*"); + + assertResult(); + } + + /** + * Test a {@link WildcardQueryNode} with a non-blank field. + */ + @Test + void testWildcardQueryNodeWithField() throws QueryNodeParseException { + WildcardQueryNode node = new WildcardQueryNode("FOO", "ab*", 0, 3); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link WildcardQueryNode} with a blank field. + */ + @Test + void testWildcardQueryNodeWithoutField() throws QueryNodeParseException { + WildcardQueryNode node = new WildcardQueryNode("", "ab*", 0, 3); + givenQueryNode(node); + + expect("ab*"); + + assertResult(); + } + + /** + * Test a {@link RegexpQueryNode} with a non-blank field. + */ + @Test + void testRegexpQueryNodeWithField() throws QueryNodeParseException { + RegexpQueryNode node = new RegexpQueryNode("FOO", "ab*", 0, 3); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link RegexpQueryNode} with a blank field. + */ + @Test + void testRegexpQueryNodeWithoutField() throws QueryNodeParseException { + RegexpQueryNode node = new RegexpQueryNode("", "ab*", 0, 3); + givenQueryNode(node); + + expect("/ab*/"); + + assertResult(); + } + + /** + * Test a {@link FunctionQueryNode}. + */ + @Test + void testFunctionQueryNode() throws QueryNodeParseException { + FunctionQueryNode node = (FunctionQueryNode) parse("#INCLUDE(FIELD, reg\\,ex)"); + givenQueryNode(node); + + expect("#INCLUDE(FIELD, reg,ex)"); + + assertResult(); + } + + /** + * Test a {@link NotBooleanQueryNode} that has a single clause. + */ + @Test + void testNotBooleanQueryNode() throws QueryNodeParseException { + QueryNode node = parse("FOO:abc NOT BAR:def"); + givenQueryNode(node); + + expect("FOO:abc NOT BAR:def"); + + assertResult(); + } + + /** + * Test a {@link NotBooleanQueryNode} that has multiple clauses. + */ + @Test + void testNotBooleanQueryNodeWithMultipleClauses() throws QueryNodeParseException { + QueryNode node = parse("FOO:abc BAR:abc NOT HAT:bbb HEY:whomai"); + givenQueryNode(node); + + expectExactMatchToQueryString(queryNode); + + assertResult(); + } + + /** + * Test an {@link AnyQueryNode} with a field. + */ + @Test + void testAnyQueryNodeWithField() throws QueryNodeParseException { + List clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "def", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "efg", 0, 3)); + AnyQueryNode node = new AnyQueryNode(clauses, "FOO", 2); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test an {@link AnyQueryNode} without a field. + */ + @Test + void testAnyQueryNodeWithoutField() throws QueryNodeParseException { + List clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "abc", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "def", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "efg", 0, 3)); + AnyQueryNode node = new AnyQueryNode(clauses, "", 2); + givenQueryNode(node); + + expect("( abc def efg ) ANY 2"); + + assertResult(); + } + + /** + * Test a {@link ProximityQueryNode} with a field. + */ + @Test + void testProximityQueryNodeWithField() throws QueryNodeParseException { + List clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "1", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "2", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "3", 0, 3)); + ProximityQueryNode node = new ProximityQueryNode(clauses, "FOO", ProximityQueryNode.Type.NUMBER, 2, true); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link ProximityQueryNode} without a field. + */ + @Test + void testProximityQueryNodeWithoutField() throws QueryNodeParseException { + List clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "1", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "2", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "3", 0, 3)); + ProximityQueryNode node = new ProximityQueryNode(clauses, "", ProximityQueryNode.Type.NUMBER, 2, true); + givenQueryNode(node); + + expect("( 1 2 3 ) WITHIN 2 INORDER"); + + assertResult(); + } + + /** + * Test a {@link TermRangeQueryNode} where both bounds are inclusive. + */ + @Test + void testTermRangeQueryNodeBothBoundsInclusive() throws QueryNodeException { + FieldQueryNode lowerBound = new FieldQueryNode("FOO", "aaa", 0, 3); + FieldQueryNode upperBound = new FieldQueryNode("FOO", "zzz", 0, 3); + + TermRangeQueryNode rangeNode = new TermRangeQueryNode(lowerBound, upperBound, true, true); + givenQueryNode(rangeNode); + + expectExactMatchToQueryString(rangeNode); + + assertResult(); + } + + /** + * Test a {@link TermRangeQueryNode} where both bounds are exclusive. + */ + @Test + void testTermRangeQueryNodeBothBoundExclusive() throws QueryNodeException { + FieldQueryNode lowerBound = new FieldQueryNode("FOO", "aaa", 0, 3); + FieldQueryNode upperBound = new FieldQueryNode("FOO", "zzz", 0, 3); + + TermRangeQueryNode rangeNode = new TermRangeQueryNode(lowerBound, upperBound, false, false); + givenQueryNode(rangeNode); + + expectExactMatchToQueryString(rangeNode); + + assertResult(); + } + + /** + * Test a {@link TermRangeQueryNode} where one bound is exclusive. + */ + @Test + void testTermRangeQueryNodeOneBoundExclusive() throws QueryNodeException { + FieldQueryNode lowerBound = new FieldQueryNode("FOO", "aaa", 0, 3); + FieldQueryNode upperBound = new FieldQueryNode("FOO", "zzz", 0, 3); + + TermRangeQueryNode rangeNode = new TermRangeQueryNode(lowerBound, upperBound, false, true); + givenQueryNode(rangeNode); + + expectExactMatchToQueryString(rangeNode); + + assertResult(); + } + + /** + * Test a {@link SynonymQueryNode}. + */ + @Test + void testSynonymQueryNode() throws QueryNodeParseException { + List clauses = new ArrayList<>(); + clauses.add(new FieldQueryNode("FOO", "1", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "2", 0, 3)); + clauses.add(new FieldQueryNode("FOO", "3", 0, 3)); + SynonymQueryNode node = new SynonymQueryNode(clauses); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + /** + * Test a {@link BooleanModifierNode}. + */ + @Test + void testBooleanModifierNode() throws QueryNodeParseException { + BooleanModifierNode node = new BooleanModifierNode(new FieldQueryNode("FOO", "abc", 0, 3), ModifierQueryNode.Modifier.MOD_REQ); + givenQueryNode(node); + + expectExactMatchToQueryString(node); + + assertResult(); + } + + private void givenQueryNode(QueryNode node) { + this.queryNode = node; + } + + private void expect(String query) { + this.expectedQuery = query; + } + + private void expectExactMatchToQueryString(QueryNode queryNode) throws QueryNodeParseException { + expect(queryNode.toQueryString(escapeQuerySyntax).toString()); + } + + private void assertResult() throws QueryNodeParseException { + String actual = LuceneQueryStringBuildingVisitor.build(queryNode); + Assert.assertEquals(expectedQuery, actual); + } + + private QueryNode parse(String query) throws QueryNodeParseException { + return parser.parse(query, ""); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/UnescapedWildcardsInQuotedPhrasesVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/UnescapedWildcardsInQuotedPhrasesVisitorTest.java new file mode 100644 index 00000000000..51bcc5e66fd --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/UnescapedWildcardsInQuotedPhrasesVisitorTest.java @@ -0,0 +1,96 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +class UnescapedWildcardsInQuotedPhrasesVisitorTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + private String query; + private final List> expectedPhrases = new ArrayList<>(); + + @AfterEach + void tearDown() { + query = null; + expectedPhrases.clear(); + } + + /** + * Test a query with a quoted phrase without wildcards. + */ + @Test + void testQuotedPhraseWithoutWildcards() throws QueryNodeParseException { + givenQuery("FOO:\"abc\""); + // Do not expect any phrases. + assertResult(); + } + + /** + * Test a query with an quoted phrase an escaped wildcard. + */ + @Test + void testQuotedPhraseWithEscapedWildcard() throws QueryNodeParseException { + // Backslash must be escaped here for it to remain in parsed query. + givenQuery("FOO:\"a\\\\*bc\""); + // Do not expect any phrases. + assertResult(); + } + + /** + * Test a query with quoted phrases with a non-escaped wildcard at the beginning, in the middle, and at the end of the phrase. + */ + @Test + void testQuotedPhraseWithUnescapedWildcard() throws QueryNodeParseException { + givenQuery("FOO:\"*abc\" OR FOO:\"de*f\" OR FOO:\"efg*\""); + expectFieldedPhrase("FOO", "*abc"); + expectFieldedPhrase("FOO", "de*f"); + expectFieldedPhrase("FOO", "efg*"); + assertResult(); + } + + /** + * Test a query with an unfielded quoted phrases with a non-escaped wildcard. + */ + @Test + void testUnfieldedQuotedPhraseWithUnescapedWildcard() throws QueryNodeParseException { + givenQuery("\"*abc\""); + expectUnfieldedPhrase("*abc"); + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void expectUnfieldedPhrase(String phrase) { + this.expectedPhrases.add(Pair.of("", phrase)); + } + + private void expectFieldedPhrase(String field, String phrase) { + this.expectedPhrases.add(Pair.of(field, phrase)); + } + + private void assertResult() throws QueryNodeParseException { + QueryNode node = parser.parse(query, ""); + PrintingVisitor.printToStdOut(node); + // @formatter:off + List> actual = UnescapedWildcardsInQuotedPhrasesVisitor.check(node).stream() + .map(result -> Pair.of(result.getFieldAsString(), result.getTextAsString())) + .collect(Collectors.toList()); + // @formatter:on + + Assertions.assertEquals(expectedPhrases, actual); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/UnfieldedTermsVisitorTest.java b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/UnfieldedTermsVisitorTest.java new file mode 100644 index 00000000000..5b27d8b63dd --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/lucene/visitors/UnfieldedTermsVisitorTest.java @@ -0,0 +1,71 @@ +package datawave.query.lucene.visitors; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +class UnfieldedTermsVisitorTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + private String query; + private final List expectedTerms = new ArrayList<>(); + + @AfterEach + void tearDown() { + query = null; + expectedTerms.clear(); + } + + /** + * Test a query without unfielded terms. + */ + @Test + void testQueryWithoutUnfieldedTerms() throws QueryNodeParseException { + givenQuery("FOO:123 OR BAR:654"); + // Do not expect any terms. + assertResult(); + } + + /** + * Test a query with unfielded terms. + */ + @Test + void testQueryWithUnfieldedTerms() throws QueryNodeParseException { + givenQuery("FOO:123 643 OR abc 'bef'"); + expectTerms("643", "abc", "'bef'"); + assertResult(); + } + + /** + * Test that grouped terms directly after a field are not flagged. + */ + @Test + void testGroupedFieldTerms() throws QueryNodeParseException { + givenQuery("643 OR FIELD:(123 OR 456)"); + expectTerms("643"); + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void expectTerms(String... terms) { + expectedTerms.addAll(List.of(terms)); + } + + private void assertResult() throws QueryNodeParseException { + QueryNode node = parser.parse(query, ""); + List actual = UnfieldedTermsVisitor.check(node); + Assertions.assertEquals(expectedTerms, actual); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousNotRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousNotRuleTest.java new file mode 100644 index 00000000000..6a37586abfe --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousNotRuleTest.java @@ -0,0 +1,125 @@ +package datawave.query.rules; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +class AmbiguousNotRuleTest extends ShardQueryRuleTest { + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a query that does not contain a NOT. + */ + @Test + void testQueryWithoutNOT() throws Exception { + givenQuery("FOO:123"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with a single unwrapped term preceding the NOT. + */ + @Test + void testNOTWithSingleUnwrappedPrecedingTerms() throws Exception { + givenQuery("FIELD1:abc NOT FIELD:def"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with a single wrapped term preceding the NOT. + */ + @Test + void testNOTWithSingleWrappedPrecedingTerms() throws Exception { + givenQuery("(FIELD1:abc) NOT FIELD:def"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with multiple wrapped terms preceding the NOT. + */ + @ParameterizedTest() + @ValueSource(strings = {"OR", "AND"}) + void testNOTWithWrappedMultiplePrecedingTerms(String junction) throws Exception { + givenQuery("(FIELD1:abc " + junction + " FIELD2:def) NOT FIELD:ghi"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a NOT with multiple unwrapped terms preceding the NOT. + */ + @ParameterizedTest() + @ValueSource(strings = {"OR", "AND"}) + void testNOTWithUnwrappedMultiplePrecedingTerms(String junction) throws Exception { + givenQuery("FIELD1:abc " + junction + " FIELD2:def NOT FIELD:ghi"); + + expectMessage("Ambiguous usage of NOT detected with multiple unwrapped preceding terms: \"FIELD1:abc " + junction + + " FIELD2:def NOT\" should be \"(FIELD1:abc " + junction + " FIELD2:def) NOT\"."); + + assertResult(); + } + + /** + * Test a query with a NOT with multiple unwrapped terms preceding the NOT that will be automatically ANDed. + * + * @throws Exception + */ + @Test + void testNOTWithUnwrappedAutomaticallyAndedPreceedingTerms() throws Exception { + givenQuery("FIELD1:abc FIELD2:def NOT FIELD:ghi"); + + expectMessage("Ambiguous usage of NOT detected with multiple unwrapped preceding terms: \"FIELD1:abc AND FIELD2:def NOT\" should be " + + "\"(FIELD1:abc AND FIELD2:def) NOT\"."); + + assertResult(); + } + + /** + * Test a query with a NOT with multiple wrapped terms preceding the NOT that will be automatically ANDed. + * + * @throws Exception + */ + @Test + void testNOTWithWrappedAutomaticallyAndedPreceedingTerms() throws Exception { + givenQuery("(FIELD1:abc FIELD2:def) NOT FIELD:ghi"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query that does not consist entirely of a NOT. + */ + @Test + void testQueryWithTermThatIsNotPartOfNOT() throws Exception { + givenQuery("FIELD1:abc OR (FIELD2:abc FIELD3:def NOT FIELD4:ghi)"); + + expectMessage("Ambiguous usage of NOT detected with multiple unwrapped preceding terms: \"FIELD2:abc AND FIELD3:def NOT\" should be " + + "\"(FIELD2:abc AND FIELD3:def) NOT\"."); + + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new AmbiguousNotRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousOrPhrasesRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousOrPhrasesRuleTest.java new file mode 100644 index 00000000000..d9037e46e78 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousOrPhrasesRuleTest.java @@ -0,0 +1,187 @@ +package datawave.query.rules; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class AmbiguousOrPhrasesRuleTest extends ShardQueryRuleTest { + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a basic query with no ambiguous phrases. + */ + @Test + void testQueryWithoutAmbiguousPhrase() throws Exception { + givenQuery("FOO:abc"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with wrapped fielded phrases. + */ + @Test + void testQueryWithWrappedPhrase() throws Exception { + givenQuery("FOO:(abc OR def)"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with quoted phrases. + */ + @Test + void testQueryWithQuotedPhrases() throws Exception { + givenQuery("FOO:\"abc\" OR \"def\""); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with nested wrapped fielded phrases. + */ + @Test + void testQueryWithNestedWrappedPhrase() throws Exception { + givenQuery("FOO:(((abc OR def)))"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with a single fielded phrase that is wrapped. + */ + @Test + void testQueryWithWrappedSingleFieldedPhrase() throws Exception { + givenQuery("(FOO:abc)"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query where ambiguous phrases are separated by an AND. + */ + @Test + void testAndedPhrase() throws Exception { + givenQuery("FOO:abc AND def"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with multiple grouped and ORed phrases. + */ + @Test + void testValidPhraseWithMultipleGroupedOrs() throws Exception { + givenQuery("(FOO:(abc OR def)) OR ((BAR:efg AND HAT:(aaa OR bbb OR ccc))) AND #INCLUDE(FOO,'abc*')"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with unfielded ambiguous ORed phrases. + */ + @Test + void testQueryWithUnfieldedAmbiguousPhraseOnly() throws Exception { + givenQuery("abc OR def OR efg"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with ambiguous ORed phrases. + */ + @Test + void testQueryWithAmbiguousPhrase() throws Exception { + givenQuery("FOO:abc OR def OR efg"); + + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: FOO:abc OR def OR efg Recommended: FOO:(abc OR def OR efg)"); + + assertResult(); + } + + /** + * Test a query with ambiguous ORed phrases that are wrapped outside the fielded term. + */ + @Test + void testWrappedQueryWithAmbiguousPhrase() throws Exception { + givenQuery("(FOO:abc OR def OR efg)"); + + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: ( FOO:abc OR def OR efg ) Recommended: FOO:(abc OR def OR efg)"); + + assertResult(); + } + + /** + * Test a query with nested ambiguous ORed phrases that could be flattened. + */ + @Test + void testQueryWithNestedAmbiguousPhrases() throws Exception { + givenQuery("(FOO:abc OR (def OR efg))"); + + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: ( FOO:abc OR ( def OR efg ) ) Recommended: FOO:(abc OR def OR efg)"); + + assertResult(); + } + + /** + * Test a query with a variety of ambiguous ORed phrases, some of which should be flagged. + */ + @Test + void testQueryWithMultipleAmbiguousPhrases() throws Exception { + givenQuery("FOO:aaa AND bbb AND (BAR:aaa OR bbb OR ccc OR HAT:\"ear\" nose) OR (aaa OR bbb OR VEE:eee OR 123 OR (gee OR \"wiz\")) AND (EGG:yolk OR shell)"); + + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: BAR:aaa OR bbb OR ccc Recommended: BAR:(aaa OR bbb OR ccc)"); + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: VEE:eee OR 123 Recommended: VEE:(eee OR 123)"); + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: ( EGG:yolk OR shell ) Recommended: EGG:(yolk OR shell)"); + + assertResult(); + } + + /** + * Test a query with nested wrapped fielded phrases. + */ + @Test + void testQueryWithAmbiguousPhraseInSeparateGroups() throws Exception { + givenQuery("((FOO:abc OR def) OR (aaa OR bbb))"); + + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: ( FOO:abc OR def ) Recommended: FOO:(abc OR def)"); + + assertResult(); + } + + /** + * Test a query with consecutive groupings of ambiguous phrases. + */ + @Test + void testQueryWithConsecutiveAmbiguousPhrases() throws Exception { + givenQuery("FOO:abc OR def OR BAR:aaa OR bbb"); + + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: FOO:abc OR def Recommended: FOO:(abc OR def)"); + expectMessage("Ambiguous unfielded terms OR'd with fielded term detected: BAR:aaa OR bbb Recommended: BAR:(aaa OR bbb)"); + + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new AmbiguousOrPhrasesRule(ruleName); + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousUnquotedPhrasesRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousUnquotedPhrasesRuleTest.java new file mode 100644 index 00000000000..56215f39d77 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/AmbiguousUnquotedPhrasesRuleTest.java @@ -0,0 +1,118 @@ +package datawave.query.rules; + +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class AmbiguousUnquotedPhrasesRuleTest extends ShardQueryRuleTest { + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test query without ambiguous phrases. + */ + @Test + void testQueryWithoutAmbiguousPhrases() throws Exception { + givenQuery("FOO:\"123 456\" OR FOO:bef"); + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with ambiguous phrases after an unquoted fielded term. + */ + @Test + void testAmbiguousPhraseAfterFieldedTerm() throws Exception { + givenQuery("FOO:abc def ghi"); + expectMessage("Ambiguous unfielded terms AND'd with fielded term detected: FOO:abc AND def AND ghi. Recommended: FOO:\"abc def ghi\""); + assertResult(); + } + + /** + * Test a query with ambiguous phrases after a quoted phrase. + */ + @Test + void testAmbiguousPhraseAfterQuotedFieldedTerm() throws Exception { + givenQuery("FOO:\"abc\" def ghi"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with ambiguous phrases before a fielded term. + */ + @Test + void testAmbiguousPhraseBeforeFieldedTerm() throws Exception { + givenQuery("abc def FOO:ghi"); + + // Do not expect any results. + assertResult(); + } + + /** + * Test a query with ambiguous phrases before a fielded term. + */ + @Test + void testMultipleFieldsWithAmbiguousPhrases() throws Exception { + givenQuery("FOO:abc def ghi OR BAR:aaa bbb ccc AND 333 HAT:\"111\" 222 AND HEN:car VEE:elephant zebra VEE:deer"); + + expectMessage("Ambiguous unfielded terms AND'd with fielded term detected: FOO:abc AND def AND ghi. Recommended: FOO:\"abc def ghi\""); + expectMessage("Ambiguous unfielded terms AND'd with fielded term detected: BAR:aaa AND bbb AND ccc. Recommended: BAR:\"aaa bbb ccc\""); + expectMessage("Ambiguous unfielded terms AND'd with fielded term detected: VEE:elephant AND zebra. Recommended: VEE:\"elephant zebra\""); + + assertResult(); + } + + /** + * Test a query with grouped ambiguous terms following a fielded term. + */ + @Test + void testGroupedAmbiguousPhrasesAfterFieldedTerm() throws Exception { + givenQuery("FOO:abc (def ghi)"); + + expectMessage("Ambiguous unfielded terms AND'd with fielded term detected: FOO:abc AND ( def AND ghi ). Recommended: FOO:\"abc def ghi\""); + + assertResult(); + } + + /** + * Test a query with nested grouped ambiguous terms following a fielded term. + */ + @Test + void testNestedGroupedAmbiguousPhrasesAfterFieldedTerm() throws Exception { + givenQuery("FOO:abc (def ghi (jkl))"); + + expectMessage("Ambiguous unfielded terms AND'd with fielded term detected: FOO:abc AND ( def AND ghi AND ( jkl ) ). Recommended: FOO:\"abc def ghi jkl\""); + + assertResult(); + } + + /** + * Test a query with ambiguous terms that are explicitly ANDed with a preceding fielded term. + * + * @throws QueryNodeParseException + */ + @Test + void testAmbiguousPhrasesAfterExplicitANDWithFieldedTerm() throws Exception { + givenQuery("FOO:abc AND def AND ghi"); + + expectMessage("Ambiguous unfielded terms AND'd with fielded term detected: FOO:abc AND def AND ghi. Recommended: FOO:\"abc def ghi\""); + + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new AmbiguousUnquotedPhrasesRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/FieldExistenceRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/FieldExistenceRuleTest.java new file mode 100644 index 00000000000..cd14cfe6106 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/FieldExistenceRuleTest.java @@ -0,0 +1,98 @@ +package datawave.query.rules; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.easymock.EasyMock; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import datawave.query.util.MetadataHelper; +import datawave.query.util.MockMetadataHelper; + +public class FieldExistenceRuleTest extends ShardQueryRuleTest { + + private static final Set ALL_FIELDS = Set.of("FOO", "BAR", "BAT"); + private static final String ANYFIELD = "_ANYFIELD_"; + private static final MockMetadataHelper defaultMetadataHelper = new MockMetadataHelper(); + + private final Set fieldExceptions = new HashSet<>(); + + @BeforeAll + public static void beforeClass() throws Exception { + defaultMetadataHelper.addFields(ALL_FIELDS); + } + + @BeforeEach + public void setUp() throws Exception { + givenRuleName(RULE_NAME); + givenMetadataHelper(defaultMetadataHelper); + expectRuleName(RULE_NAME); + } + + /** + * Test a query where all fields exist. + */ + @Test + public void testAllFieldsExist() throws Exception { + givenQuery("FOO == 'abc' || BAR =~ 'abc' || filter:includeRegex(BAT, '45*')"); + + assertResult(); + } + + /** + * Test a query where some fields do not exist. + */ + @Test + public void testNonExistentFields() throws Exception { + givenQuery("TOMFOOLERY == 'abc' || CHAOS =~ 'abc' || filter:includeRegex(SHENANIGANS, '45.8') || FOO == 'aa'"); + expectMessage("Fields not found in data dictionary: TOMFOOLERY, CHAOS, SHENANIGANS"); + assertResult(); + } + + /** + * Test a query that has a non-existent field that is a special field. + */ + @Test + public void testSpecialField() throws Exception { + givenQuery("FOO == 'abc' || TOMFOOLERY == 'abc' || _ANYFIELD_ = 'abc'"); + givenFieldException(ANYFIELD); + expectMessage("Fields not found in data dictionary: TOMFOOLERY"); + assertResult(); + } + + /** + * Test a scenario where an exception gets thrown by the metadata helper. + */ + @Test + public void testExceptionThrown() throws Exception { + MetadataHelper mockHelper = EasyMock.createMock(MetadataHelper.class); + Exception exception = new IllegalArgumentException("Failed to fetch all fields"); + EasyMock.expect(mockHelper.getAllFields(Collections.emptySet())).andThrow(exception); + EasyMock.replay(mockHelper); + + givenQuery("FOO == 'abc'"); + givenMetadataHelper(mockHelper); + + expectException(exception); + assertResult(); + } + + private void givenFieldException(String exception) { + this.fieldExceptions.add(exception); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToJexl(); + } + + @Override + protected ShardQueryRule getNewRule() { + FieldExistenceRule rule = new FieldExistenceRule(ruleName); + rule.setSpecialFields(fieldExceptions); + return rule; + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/FieldPatternPresenceRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/FieldPatternPresenceRuleTest.java new file mode 100644 index 00000000000..c5aad1b6c5d --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/FieldPatternPresenceRuleTest.java @@ -0,0 +1,82 @@ +package datawave.query.rules; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.easymock.EasyMock; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import datawave.query.jexl.JexlASTHelper; +import datawave.query.util.MetadataHelper; +import datawave.query.util.MockMetadataHelper; + +public class FieldPatternPresenceRuleTest extends ShardQueryRuleTest { + + private static final MockMetadataHelper defaultMetadataHelper = new MockMetadataHelper(); + + // @formatter:off + private static final Map fieldMessages = Map.of( + "FOO", "Field FOO is restricted.", + "TOMFOOLERY", "No tomfoolery allowed.", + "_ANYFIELD_", "Unfielded term _ANYFIELD_ present." + ); + // @formatter:on + + // @formatter:off + private static final Map patternMessages = Map.of( + ".*", "Pattern too expansive.", + "(^_^)", "Pattern looks like a face." + ); + // @formatter:on + + @BeforeEach + public void setUp() throws Exception { + givenRuleName(RULE_NAME); + givenMetadataHelper(defaultMetadataHelper); + expectRuleName(RULE_NAME); + } + + /** + * Test a query with no matching fields or patterns. + */ + @Test + public void testNoMatchesFound() throws Exception { + givenQuery("BAR == 'abc' || BAR =~'abc'"); + + // Do not expect any messages. + assertResult(); + } + + /** + * Test a query where matching fields and patterns are found. + */ + @Test + public void testMatchesFound() throws Exception { + givenQuery("TOMFOOLERY == 'abc' && _ANYFIELD_ =~ '.*' && FOO =~ '(^_^)' && HAT == 'def'"); + expectMessage("Field FOO is restricted."); + expectMessage("No tomfoolery allowed."); + expectMessage("Unfielded term _ANYFIELD_ present."); + expectMessage("Pattern too expansive."); + expectMessage("Pattern looks like a face."); + + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToJexl(); + } + + @Override + protected ShardQueryRule getNewRule() { + FieldPatternPresenceRule rule = new FieldPatternPresenceRule(ruleName); + rule.setFieldMessages(fieldMessages); + rule.setPatternMessages(patternMessages); + return rule; + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/IncludeExcludeArgsRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/IncludeExcludeArgsRuleTest.java new file mode 100644 index 00000000000..66363c32f38 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/IncludeExcludeArgsRuleTest.java @@ -0,0 +1,251 @@ +package datawave.query.rules; + +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.easymock.EasyMock; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +class IncludeExcludeArgsRuleTest extends ShardQueryRuleTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a query that does not have the INCLUDE or EXCLUDE function. + */ + @Test + public void testQueryWithNoIncludeOrExcludeFunction() throws Exception { + givenQuery("FOO:'abc'"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithSingleFieldAndValue(String name) throws Exception { + givenQuery("#" + name + "(FOO,'abc')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithMultipleFieldAndValue(String name) throws Exception { + givenQuery("#" + name + "(FOO,'abc',BAR,'def')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithSingleFieldAndValueAfterOR(String name) throws Exception { + givenQuery("#" + name + "(OR,FOO,'abc')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single field and value after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithSingleFieldAndValueAfterAND(String name) throws Exception { + givenQuery("#" + name + "(AND,FOO,'abc')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with multiple fields and values after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithMultipleFieldsAndValuesAfterOR(String name) throws Exception { + givenQuery("#" + name + "(OR,FOO,'abc',BAR,'def')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with multiple fields and values after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + public void testFunctionWithMultipleFieldsAndValuesAfterAND(String name) throws Exception { + givenQuery("#" + name + "(AND,FOO,'abc',BAR,'def')"); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with just a single arg. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithSingleArg(String name) throws Exception { + givenQuery("#" + name + "(FIELD)"); + expectMessage("Function #" + name + " supplied with uneven number of arguments. Must supply field/value pairs, e.g. #" + name + "(FIELD, 'value') or " + + "#" + name + "(FIELD1, 'value1', FIELD2, 'value2')."); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with an uneven number of arguments greater than one without a boolean arg. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithUnevenArgs(String name) throws Exception { + givenQuery("#" + name + "(FIELD1,'value',FIELD2)"); + expectMessage("Function #" + name + " supplied with uneven number of arguments. Must supply field/value pairs, e.g. #" + name + "(FIELD, 'value') or " + + "#" + name + "(FIELD1, 'value1', FIELD2, 'value2')."); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with no arguments after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithNoArgsAfterOR(String name) throws Exception { + givenQuery("#" + name + "(OR)"); + expectMessage("Function #" + name + + " supplied with no arguments after the first boolean arg OR. Must supply at least a field and value after the first " + + "boolean arg, e.g. #" + name + "(OR, FIELD, 'value')."); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with no arguments after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithNoArgsAfterAND(String name) throws Exception { + givenQuery("#" + name + "(AND)"); + expectMessage("Function #" + name + " supplied with no arguments after the first boolean arg AND. Must supply at least a field and value after the " + + "first boolean arg, e.g. #" + name + "(AND, FIELD, 'value')."); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single argument after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithSingleArgsAfterOR(String name) throws Exception { + givenQuery("#" + name + "(OR,'value')"); + expectMessage("Function #" + name + " supplied with uneven number of arguments after the first boolean arg OR. Must supply field/value after the " + + "boolean, e.g. #" + name + "(OR, FIELD, 'value') or #" + name + "(OR, FIELD1, 'value1',' FIELD2, 'value2')."); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with a single argument after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithSingleArgsAfterAND(String name) throws Exception { + givenQuery("#" + name + "(AND,'value')"); + expectMessage("Function #" + name + " supplied with uneven number of arguments after the first boolean arg AND. Must supply field/value after the " + + "boolean, e.g. #" + name + "(AND, FIELD, 'value') or #" + name + "(AND, FIELD1, 'value1',' FIELD2, 'value2')."); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with an uneven number of arguments greater than one after an OR boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithUnevenArgsAfterOR(String name) throws Exception { + givenQuery("#" + name + "(OR,FIELD1,'value',FIELD2)"); + expectMessage("Function #" + name + " supplied with uneven number of arguments after the first boolean arg OR. Must supply field/value after the " + + "boolean, e.g. #" + name + "(OR, FIELD, 'value') or #" + name + "(OR, FIELD1, 'value1',' FIELD2, 'value2')."); + assertResult(); + } + + /** + * Test versions of the INCLUDE and EXCLUDE functions with an uneven number of arguments greater than one after an AND boolean. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"INCLUDE", "EXCLUDE"}) + void testFunctionWithUnevenArgsAfterAND(String name) throws Exception { + givenQuery("#" + name + "(AND,FIELD1,'value',FIELD2)"); + expectMessage("Function #" + name + " supplied with uneven number of arguments after the first boolean arg AND. Must supply field/value after the " + + "boolean, e.g. #" + name + "(AND, FIELD, 'value') or #" + name + "(AND, FIELD1, 'value1',' FIELD2, 'value2')."); + assertResult(); + } + + /** + * Verify that when an exception is thrown, it is captured in the result. + */ + @Test + void testExceptionThrown() { + ShardQueryValidationConfiguration configuration = EasyMock.mock(ShardQueryValidationConfiguration.class); + Exception exception = new IllegalArgumentException("Failed to get query"); + EasyMock.expect(configuration.getParsedQuery()).andThrow(exception); + EasyMock.replay(configuration); + + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new IncludeExcludeArgsRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/IncludeExcludeIndexFieldsRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/IncludeExcludeIndexFieldsRuleTest.java new file mode 100644 index 00000000000..58fe92e1130 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/IncludeExcludeIndexFieldsRuleTest.java @@ -0,0 +1,106 @@ +package datawave.query.rules; + +import java.util.Set; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import datawave.query.util.MockMetadataHelper; + +class IncludeExcludeIndexFieldsRuleTest extends ShardQueryRuleTest { + + private static final MockMetadataHelper metadataHelper = new MockMetadataHelper(); + + @BeforeAll + static void beforeAll() { + metadataHelper.setIndexedFields(Set.of("INDEXED1", "INDEXED2")); + } + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + givenMetadataHelper(metadataHelper); + expectRuleName(RULE_NAME); + } + + /** + * Test a query without functions. + */ + @Test + void testQueryWithoutFunctions() throws Exception { + givenQuery("FOO == 'abc'"); + + // Do not expect any messages. + assertResult(); + } + + /** + * Test versions of the includeRegex and excludeRegex functions without indexed fields. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"includeRegex", "excludeRegex"}) + void testFunctionWithoutIndexedField(String name) throws Exception { + givenQuery("filter:" + name + "(FOO,'value')"); + + // Do not expect any messages. + assertResult(); + } + + /** + * Test versions of the includeRegex and excludeRegex functions with an indexed field. + * + * @param name + * the function name + */ + @ParameterizedTest + @ValueSource(strings = {"includeRegex", "excludeRegex"}) + void testFunctionWithSingleIndexedField(String name) throws Exception { + givenQuery("filter:" + name + "(INDEXED1,'value')"); + expectMessage("Indexed fields found within the function filter:" + name + ": INDEXED1"); + + assertResult(); + } + + /** + * Test a query with both the includeRegex and excludeRegex functions with indexed fields. + * + * @throws Exception + */ + @Test + void testMultipleFunctionWithIndexedField() throws Exception { + givenQuery("filter:includeRegex(INDEXED1,'value') && filter:excludeRegex(INDEXED2, 'value')"); + expectMessage("Indexed fields found within the function filter:includeRegex: INDEXED1"); + expectMessage("Indexed fields found within the function filter:excludeRegex: INDEXED2"); + + assertResult(); + } + + /** + * Test a query with both the includeRegex and excludeRegex functions without indexed fields. + * + * @throws Exception + */ + @Test + void testMultipleFunctionWithoutIndexedField() throws Exception { + givenQuery("filter:includeRegex(FOO,'value') && filter:excludeRegex(BAR, 'value')"); + + // Do not expect any messages. + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToJexl(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new IncludeExcludeIndexFieldsRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/InvalidQuoteRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/InvalidQuoteRuleTest.java new file mode 100644 index 00000000000..5d5d53a44c8 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/InvalidQuoteRuleTest.java @@ -0,0 +1,115 @@ +package datawave.query.rules; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class InvalidQuoteRuleTest extends ShardQueryRuleTest { + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a query that does not contain any phrases with invalid quotes. + */ + @Test + void testQueryWithoutInvalidQuotes() throws Exception { + givenQuery("FOO:'abc' OR FOO:'def'"); + // Do not expect to find any phrases. + assertResult(); + } + + /** + * Test a query that contains phrases with invalid quotes at both ends. + */ + @Test + void testQueryWithInvalidQuotesAtBothEndsOfPhrases() throws Exception { + givenQuery("FOO:`abc` OR FOO:`def` OR FOO:'efg'"); + expectMessage("Invalid quote ` used in phrase \"FOO:`abc`\". Use ' instead."); + expectMessage("Invalid quote ` used in phrase \"FOO:`def`\". Use ' instead."); + assertResult(); + } + + /** + * Test a query that contains a phrase with an invalid quote at the start. + */ + @Test + void testQueryWithInvalidQuotesAtStartOPhrase() throws Exception { + givenQuery("FOO:`abc' OR FOO:'efg'"); + expectMessage("Invalid quote ` used in phrase \"FOO:`abc'\". Use ' instead."); + assertResult(); + } + + /** + * Test a query that contains the invalid quote within the phrase, but not at either end. + */ + @Test + void testQueryWithEmptyInvalidQuotedInMiddle() throws Exception { + givenQuery("FOO:'ab`cd' OR FOO:'efg'"); + // Do not expect to find any phrases. + assertResult(); + } + + /** + * Test a query that contains a phrase with an invalid quote at the end. + */ + @Test + void testQueryWithInvalidQuotesAtEndOPhrase() throws Exception { + givenQuery("FOO:'abc` OR FOO:'efg'"); + expectMessage("Invalid quote ` used in phrase \"FOO:'abc`\". Use ' instead."); + assertResult(); + } + + /** + * Test a query that contains a phrase with an empty phrase with invalid quotes. + */ + @Test + void testQueryWithEmptyInvalidQuotedPhrase() throws Exception { + givenQuery("FOO:`` OR FOO:'efg'"); + expectMessage("Invalid quote ` used in phrase \"FOO:``\". Use ' instead."); + assertResult(); + } + + /** + * Test a query that contains a phrase that is just one invalid quote. + */ + @Test + void testPhraseThatConsistsOfSingleInvalidQuote() throws Exception { + givenQuery("FOO:` OR FOO:'efg'"); + expectMessage("Invalid quote ` used in phrase \"FOO:`\". Use ' instead."); + assertResult(); + } + + /** + * Test a query that contains a phrase with an invalid quote inside a function. + */ + @Test + void testFunctionWithInvalidQuote() throws Exception { + givenQuery("FOO:'abc' AND #INCLUDE(BAR,`def`)"); + expectMessage("Invalid quote ` used in phrase \"#INCLUDE(BAR, `def`)\". Use ' instead."); + assertResult(); + } + + /** + * Test unfielded terms with invalid quotes. + */ + @Test + void testTermWithInvalidQuote() throws Exception { + givenQuery("`def` `abc`"); + expectMessage("Invalid quote ` used in phrase \"`def`\". Use ' instead."); + expectMessage("Invalid quote ` used in phrase \"`abc`\". Use ' instead."); + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new InvalidQuoteRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/MinimumSlopProximityRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/MinimumSlopProximityRuleTest.java new file mode 100644 index 00000000000..0a8cb36d55b --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/MinimumSlopProximityRuleTest.java @@ -0,0 +1,178 @@ +package datawave.query.rules; + +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; + +public class MinimumSlopProximityRuleTest extends ShardQueryRuleTest { + + private static final SyntaxParser parser = new AccumuloSyntaxParser(); + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a query without a slop value. + */ + @Test + public void testQueryWithNoSlop() throws Exception { + givenQuery("FIELD:'abc'"); + assertResult(); + } + + /** + * Test a query with a slop operator but no numeric value. + */ + @Test + public void testQueryWithSlopOperatorNoValue() throws Exception { + givenQuery("FIELD:\"term1 term2 term3\"~"); + assertResult(); + } + + /** + * Test a proximity query with a single term and insufficient slop value. + */ + @Test + public void testSingleTermProximityLessThanMin() throws Exception { + givenQuery("FIELD:\"term1\"~0"); + expectMessage("Invalid slop proximity, the " + 0 + " should be " + 1 + " or greater: FIELD:\"term1\"~" + 0); + assertResult(); + } + + /** + * Test a proximity query with multiple terms and insufficient slop value. + */ + @Test + public void testMultiTermProximityLessThanMin() throws Exception { + givenQuery("FIELD:\"term1 term2\"~1"); + expectMessage("Invalid slop proximity, the " + 1 + " should be " + 2 + " or greater: FIELD:\"term1 term2\"~" + 1); + assertResult(); + } + + /** + * Test a proximity query with a single term and slop value equal to the minimum allowed. + */ + @Test + public void testSingleTermProximityEqualToMin() throws Exception { + givenQuery("FIELD:\"term1\"~1"); + assertResult(); + } + + /** + * Test a proximity query with multiple terms and slop value equal to the minimum allowed. + */ + @Test + public void testMultiTermProximityEqualToMin() throws Exception { + givenQuery("FIELD:\"term1 term2\"~2"); + assertResult(); + } + + /** + * Test a proximity query with a single term and slop value greater than the minimum allowed. + */ + @Test + public void testSingleTermProximityGreaterThanMin() throws Exception { + givenQuery("FIELD:\"term1\"~2"); + assertResult(); + } + + /** + * Test a proximity query with multiple terms and slop value greater than the minimum allowed. + */ + @Test + public void testMultiTermProximityGreaterThanMin() throws Exception { + givenQuery("FIELD:\"term1 term2\"~3"); + assertResult(); + } + + /** + * Test a proximity query with padded white space on the left. + */ + @Test + public void testValidWithPaddedWhiteSpaceLeft() throws Exception { + givenQuery("FIELD:\" term1 term2 term3\"~3"); + assertResult(); + } + + /** + * Test a proximity query with padded white space on the right. + */ + @Test + public void testValidWithPaddedWhiteSpaceRight() throws Exception { + givenQuery("FIELD:\"term1 term2 term3 \"~3"); + assertResult(); + } + + /** + * Test a proximity query with padded white space between terms. + */ + @Test + public void testValidWithPaddedWhiteSpaceBetween() throws Exception { + givenQuery("FIELD:\"term1 term2 term3\"~3"); + assertResult(); + } + + /** + * Test a proximity query with padded white space on both left and right sides. + */ + @Test + public void testValidWithPaddedWhiteSpaceBothSides() throws Exception { + givenQuery("FIELD:\" term1 term2 term3 \"~3"); + assertResult(); + } + + /** + * Test an invalid proximity query with padded white space on the left and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceLeft() throws Exception { + givenQuery("FIELD:\" term1 term2 term3\"~2"); + expectMessage("Invalid slop proximity, the " + 2 + " should be " + 3 + " or greater: FIELD:\" term1 term2 term3\"~" + 2); + assertResult(); + } + + /** + * Test an invalid proximity query with padded white space on the right and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceRight() throws Exception { + givenQuery("FIELD:\"term1 term2 term3 \"~2"); + expectMessage("Invalid slop proximity, the " + 2 + " should be " + 3 + " or greater: FIELD:\"term1 term2 term3 \"~" + 2); + assertResult(); + } + + /** + * Test an invalid proximity query with padded white space between terms and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceBetween() throws Exception { + givenQuery("FIELD:\"term1 term2 term3\"~2"); + expectMessage("Invalid slop proximity, the " + 2 + " should be " + 3 + " or greater: FIELD:\"term1 term2 term3\"~" + 2); + assertResult(); + } + + /** + * Test an invalid proximity query with padded white space on both sides and insufficient slop value. + */ + @Test + public void testInvalidWithPaddedWhiteSpaceBothSides() throws Exception { + givenQuery("FIELD:\" term1 term2 term3 \"~2"); + expectMessage("Invalid slop proximity, the " + 2 + " should be " + 3 + " or greater: FIELD:\" term1 term2 term3 \"~" + 2); + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new MinimumSlopProximityRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/NumericValueRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/NumericValueRuleTest.java new file mode 100644 index 00000000000..bd6d4bbff66 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/NumericValueRuleTest.java @@ -0,0 +1,101 @@ +package datawave.query.rules; + +import java.util.Set; + +import org.easymock.EasyMock; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import datawave.data.type.LcNoDiacriticsType; +import datawave.data.type.NumberType; +import datawave.query.util.MockMetadataHelper; +import datawave.query.util.TypeMetadata; + +class NumericValueRuleTest extends ShardQueryRuleTest { + + private static final Set NUMBER_TYPE = Set.of(NumberType.class.getName()); + private static Set LC_NO_DIACRITICS_TYPE = Set.of(LcNoDiacriticsType.class.getName()); + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + @Test + void testQueryWithoutNumericValues() throws Exception { + givenQuery("FOO == 'abc' && BAR != 'abc' || HAT > 'abc' || BAT < 'abc' || HEN <= 'abc' || VEE >= 'abc'"); + + // Do not expect any messages. + assertResult(); + } + + @Test + void testQueryWithNumericValuesForNumericFields() throws Exception { + givenQuery("FOO == 1 && BAR != 1 || HAT > 1 || BAT < 1 || HEN <= 1 || VEE >= 1"); + + // Set up a mock TypeMetadata that will return field type information. + TypeMetadata typeMetadata = EasyMock.mock(TypeMetadata.class); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("FOO")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("BAR")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("HAT")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("BAT")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("HEN")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("VEE")).andReturn(NUMBER_TYPE); + EasyMock.replay(typeMetadata); + givenTypeMetadata(typeMetadata); + + // Do not expect any messages. + assertResult(); + } + + @Test + void testQueryWithNumericValuesForNonNumericFields() throws Exception { + givenQuery("FOO == 1 && BAR != 1 || HAT > 1 || BAT < 1 || HEN <= 1 || VEE >= 1"); + + // Set up a mock TypeMetadata that will return field type information. + TypeMetadata typeMetadata = EasyMock.mock(TypeMetadata.class); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("FOO")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("BAR")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("HAT")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("BAT")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("HEN")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("VEE")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.replay(typeMetadata); + givenTypeMetadata(typeMetadata); + + expectMessage("Numeric values supplied for non-numeric field(s): FOO, BAR, HAT, BAT, HEN, VEE"); + + assertResult(); + } + + @Test + void testQueryWithNumericValuesForMixedTypedFields() throws Exception { + givenQuery("FOO == 1 && BAR != 1 || HAT > 1 || BAT < 1 || HEN <= 1 || VEE >= 1"); + + // Set up a mock TypeMetadata that will return field type information. + TypeMetadata typeMetadata = EasyMock.mock(TypeMetadata.class); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("FOO")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("BAR")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("HAT")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("BAT")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("HEN")).andReturn(NUMBER_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("VEE")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.replay(typeMetadata); + givenTypeMetadata(typeMetadata); + + expectMessage("Numeric values supplied for non-numeric field(s): BAR, BAT, VEE"); + + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToJexl(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new NumericValueRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/ShardQueryRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/ShardQueryRuleTest.java new file mode 100644 index 00000000000..a1d02d4ad3c --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/ShardQueryRuleTest.java @@ -0,0 +1,120 @@ +package datawave.query.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.jexl3.parser.ASTJexlScript; +import org.apache.commons.jexl3.parser.ParseException; +import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; +import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; +import org.junit.jupiter.api.AfterEach; + +import datawave.core.query.configuration.GenericQueryConfiguration; +import datawave.microservice.query.Query; +import datawave.query.jexl.JexlASTHelper; +import datawave.query.language.parser.lucene.AccumuloSyntaxParser; +import datawave.query.util.MetadataHelper; +import datawave.query.util.TypeMetadata; +import datawave.test.QueryRuleResultAssert; + +public abstract class ShardQueryRuleTest { + + protected static final String RULE_NAME = "RuleUnderTest"; + protected static final AccumuloSyntaxParser luceneParser = new AccumuloSyntaxParser(); + + protected String ruleName; + protected String query; + protected MetadataHelper metadataHelper; + protected TypeMetadata typeMetadata; + protected Query querySettings; + protected GenericQueryConfiguration queryConfiguration; + + protected String expectedRuleName; + protected Exception expectedException; + protected final List expectedMessages = new ArrayList<>(); + + @AfterEach + void tearDown() { + this.ruleName = null; + this.query = null; + this.metadataHelper = null; + this.typeMetadata = null; + this.querySettings = null; + this.queryConfiguration = null; + this.expectedRuleName = null; + this.expectedException = null; + this.expectedMessages.clear(); + } + + protected void givenRuleName(String ruleName) { + this.ruleName = ruleName; + } + + protected void givenQuery(String query) { + this.query = query; + } + + protected void givenMetadataHelper(MetadataHelper metadataHelper) { + this.metadataHelper = metadataHelper; + } + + protected void givenTypeMetadata(TypeMetadata typeMetadata) { + this.typeMetadata = typeMetadata; + } + + protected void givenQuerySettings(Query querySettings) { + this.querySettings = querySettings; + } + + protected void givenQueryConfiguration(GenericQueryConfiguration queryConfiguration) { + this.queryConfiguration = queryConfiguration; + } + + protected void expectRuleName(String ruleName) { + this.expectedRuleName = ruleName; + } + + protected void expectException(Exception exception) { + this.expectedException = exception; + } + + protected void expectMessage(String message) { + this.expectedMessages.add(message); + } + + protected abstract Object parseQuery() throws Exception; + + protected ASTJexlScript parseQueryToJexl() throws ParseException { + return JexlASTHelper.parseJexlQuery(query); + } + + protected QueryNode parseQueryToLucene() throws QueryNodeParseException { + return luceneParser.parse(query, ""); + } + + protected abstract ShardQueryRule getNewRule(); + + protected ShardQueryValidationConfiguration getValidationConfiguration() throws Exception { + ShardQueryValidationConfiguration configuration = new ShardQueryValidationConfiguration(); + configuration.setParsedQuery(parseQuery()); + configuration.setMetadataHelper(metadataHelper); + configuration.setTypeMetadata(typeMetadata); + configuration.setQuerySettings(querySettings); + configuration.setQueryConfiguration(queryConfiguration); + return configuration; + } + + protected void assertResult() throws Exception { + ShardQueryRule rule = getNewRule(); + rule.setName(ruleName); + + ShardQueryValidationConfiguration validationConfiguration = getValidationConfiguration(); + QueryRuleResult result = rule.validate(validationConfiguration); + // @formatter:off + QueryRuleResultAssert.assertThat(result) + .hasRuleName(expectedRuleName) + .hasException(expectedException) + .hasExactMessagesInAnyOrder(expectedMessages); + // @formatter:on + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/TimeFunctionRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/TimeFunctionRuleTest.java new file mode 100644 index 00000000000..d27e3616103 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/TimeFunctionRuleTest.java @@ -0,0 +1,126 @@ +package datawave.query.rules; + +import java.util.Set; + +import org.easymock.EasyMock; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import datawave.data.type.DateType; +import datawave.data.type.LcNoDiacriticsType; +import datawave.query.util.MockMetadataHelper; +import datawave.query.util.TypeMetadata; + +class TimeFunctionRuleTest extends ShardQueryRuleTest { + + private static Set DATE_TYPE = Set.of(DateType.class.getName()); + private static Set LC_NO_DIACRITICS_TYPE = Set.of(LcNoDiacriticsType.class.getName()); + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a query that has no time functions. + */ + @Test + void testQueryWithoutTimeFunctions() throws Exception { + givenQuery("FOO == 'abc'"); + + // Do not expect any messages. + assertResult(); + } + + /** + * Test a query with a time function that reference date type fields. + */ + void testTimeFunctionWithDateTypeField() throws Exception { + givenQuery("filter:timeFunction(DATE1,DATE2,'-','>',2522880000000L)"); + givenMetadataHelper(new MockMetadataHelper()); + + // Set up a mock TypeMetadata that will return the date type for the fields in the time function. + TypeMetadata typeMetadata = EasyMock.mock(TypeMetadata.class); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("DATE1")).andReturn(DATE_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("DATE2")).andReturn(DATE_TYPE); + EasyMock.replay(typeMetadata); + givenTypeMetadata(typeMetadata); + + // Do not expect any messages. + assertResult(); + } + + /** + * Test a query with a time function that references one date type field, and one non-date type field. + */ + @Test + void testTimeFunctionWithOneNonDateTypeField() throws Exception { + givenQuery("filter:timeFunction(DATE1,NON_DATE2,'-','>',2522880000000L)"); + givenMetadataHelper(new MockMetadataHelper()); + + // Set up a mock TypeMetadata that will return non-date types for the fields in the time function. + TypeMetadata typeMetadata = EasyMock.mock(TypeMetadata.class); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("DATE1")).andReturn(DATE_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("NON_DATE2")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.replay(typeMetadata); + givenTypeMetadata(typeMetadata); + + expectMessage("Function #TIME_FUNCTION (filter:timeFunction) found with fields that are not date types: NON_DATE2"); + + assertResult(); + } + + /** + * Test a query with a time function that references only non-date type fields. + */ + @Test + void testTimeFunctionWithBothNonDateTypeField() throws Exception { + givenQuery("filter:timeFunction(NON_DATE1,NON_DATE2,'-','>',2522880000000L)"); + givenMetadataHelper(new MockMetadataHelper()); + + // Set up a mock TypeMetadata that will return non-date types for the fields in the time function. + TypeMetadata typeMetadata = EasyMock.mock(TypeMetadata.class); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("NON_DATE1")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("NON_DATE2")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.replay(typeMetadata); + givenTypeMetadata(typeMetadata); + + expectMessage("Function #TIME_FUNCTION (filter:timeFunction) found with fields that are not date types: NON_DATE1, NON_DATE2"); + + assertResult(); + } + + /** + * Test a query with multiple time functions. + */ + @Test + void testMutltipleTimeFunctions() throws Exception { + givenQuery("filter:timeFunction(DATE1,DATE2,'-','>',2522880000000L) && filter:timeFunction(NON_DATE1,DATE2,'-','>',2522880000000L) && filter:timeFunction(NON_DATE2,NON_DATE3,'-','>',2522880000000L)"); + givenMetadataHelper(new MockMetadataHelper()); + + // Set up a mock TypeMetadata that will return non-date types for the fields in the time function. + TypeMetadata typeMetadata = EasyMock.mock(TypeMetadata.class); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("DATE1")).andReturn(DATE_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("DATE2")).andReturn(DATE_TYPE).times(2); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("NON_DATE1")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("NON_DATE2")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.expect(typeMetadata.getNormalizerNamesForField("NON_DATE3")).andReturn(LC_NO_DIACRITICS_TYPE); + EasyMock.replay(typeMetadata); + givenTypeMetadata(typeMetadata); + + expectMessage("Function #TIME_FUNCTION (filter:timeFunction) found with fields that are not date types: NON_DATE1, NON_DATE2, NON_DATE3"); + + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToJexl(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new TimeFunctionRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/UnescapedSpecialCharsRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/UnescapedSpecialCharsRuleTest.java new file mode 100644 index 00000000000..6a86452b8f7 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/UnescapedSpecialCharsRuleTest.java @@ -0,0 +1,263 @@ +package datawave.query.rules; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import datawave.query.jexl.JexlASTHelper; + +public class UnescapedSpecialCharsRuleTest extends ShardQueryRuleTest { + + private final Set literalExceptions = new HashSet<>(); + private boolean escapedWhitespaceRequiredForLiterals; + private final Set patternExceptions = new HashSet<>(); + private boolean escapedWhitespaceRequiredForPatterns; + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + @AfterEach + public void tearDown() { + super.tearDown(); + literalExceptions.clear(); + patternExceptions.clear(); + escapedWhitespaceRequiredForLiterals = false; + escapedWhitespaceRequiredForPatterns = false; + } + + /** + * Test a literal and pattern that contain alphanumeric characters only. + */ + @Test + public void testNoSpecialCharacters() throws Exception { + String alphanumericStr = ofChars('a', 'z') + ofChars('A', 'Z') + ofChars('0', '9'); + givenQuery("FOO == '" + alphanumericStr + "' || FOO =~ '" + alphanumericStr + "'"); + + assertResult(); + } + + private String ofChars(char start, char end) { + return IntStream.rangeClosed(start, end).mapToObj(c -> "" + ((char) c)).collect(Collectors.joining()); + } + + /** + * Test a literal and pattern with whitespace in them and do not allow unescaped whitespace. + */ + @Test + public void testUnescapedWhitespace() throws Exception { + givenEscapedWhitespaceRequiredForLiterals(true); + givenEscapedWhitespaceRequiredForPatterns(true); + givenQuery("FOO == 'ab c' || FOO =~ 'ab cd'"); + + expectMessage("Literal string \"ab c\" has the following unescaped special character(s): ' '"); + expectMessage("Regex pattern \"ab cd\" has the following unescaped special character(s): ' '"); + assertResult(); + } + + /** + * Test a literal and pattern with whitespace in them and allow unescaped whitespace. + */ + @Test + public void testUnescapedWhitespaceAllowed() throws Exception { + givenEscapedWhitespaceRequiredForLiterals(false); + givenEscapedWhitespaceRequiredForPatterns(false); + givenQuery("FOO == 'ab c' || FOO =~ 'ab cd'"); + + // Do not expect any unescaped chars. + + assertResult(); + } + + /** + * Test literals and patterns with unescaped special chars at the start of the string. + */ + @Test + public void testUnescapedSpecialCharAtStart() throws Exception { + givenQuery("FOO == '&abc' || FOO =~ '&abc'"); + expectMessage("Literal string \"&abc\" has the following unescaped special character(s): '&'"); + expectMessage("Regex pattern \"&abc\" has the following unescaped special character(s): '&'"); + assertResult(); + } + + /** + * Test literals and patterns with unescaped special chars in the middle of the string. + */ + @Test + public void testUnescapedSpecialCharInMiddle() throws Exception { + givenQuery("FOO == 'a&bc' || FOO =~ 'a&bc'"); + expectMessage("Literal string \"a&bc\" has the following unescaped special character(s): '&'"); + expectMessage("Regex pattern \"a&bc\" has the following unescaped special character(s): '&'"); + assertResult(); + } + + /** + * Test literals and patterns with unescaped special chars at the end of the string. + */ + @Test + public void testUnescapedSpecialCharAtEnd() throws Exception { + givenQuery("FOO == 'abc&' || FOO =~ 'abc&'"); + expectMessage("Literal string \"abc&\" has the following unescaped special character(s): '&'"); + expectMessage("Regex pattern \"abc&\" has the following unescaped special character(s): '&'"); + assertResult(); + } + + /** + * Test a literal and pattern with a special character that is allowed to be escaped. + */ + @Test + public void testSpecialCharThatIsException() throws Exception { + givenLiteralExceptions('&'); + givenPatternExceptions('&'); + givenQuery("FOO == 'ab&c' || FOO =~ 'ab&d'"); + + // Do not expect any unescaped chars. + + assertResult(); + } + + /** + * Test a literal and pattern with a special character that is not an exception and is escaped. + */ + @Test + public void testEscapedSpecialChar() throws Exception { + givenQuery("FOO == 'ab\\&c' || FOO =~ 'ab\\&d'"); + + // Do not expect any unescaped chars. + assertResult(); + } + + /** + * Test that when we see a double backslash, it does not escape any special characters directly after it. + */ + @Test + public void testDoubleBackslashDoesNotEscapeCharacter() throws Exception { + // Backslashes must be doubly escaped in literals, but not patterns when parsed to JEXL. + givenQuery("FOO == 'ab\\\\\\\\&c' || FOO =~ 'ab\\\\&d'"); + expectMessage("Literal string \"ab\\\\&c\" has the following unescaped special character(s): '&'"); + expectMessage("Regex pattern \"ab\\\\&d\" has the following unescaped special character(s): '&'"); + assertResult(); + } + + /** + * Test that when we see a triple backslash, the last backslash escapes a special characters directly after it. + */ + @Test + public void testTripleBackslashEscapesCharacter() throws Exception { + // Backslashes must be doubly escaped in literals, but not patterns when parsed to JEXL. + givenQuery("FOO == 'ab\\\\\\\\\\\\&c' || FOO =~ 'ab\\\\\\&d'"); + + // Do not expect any unescaped chars. + assertResult(); + } + + /** + * Test that an unescaped backlash in a literal will be noted, but not an unescaped backslash in a pattern since it is a regex-reserved char. + */ + @Test + public void testUnescapedBackslashInLiteral() throws Exception { + // Backslashes must be doubly escaped in literals, but not patterns when parsed to JEXL. + givenQuery("FOO == '\\\\' || FOO =~ '\\\\'"); + expectMessage("Literal string \"\\\" has the following unescaped special character(s): '\\'"); + assertResult(); + } + + /** + * Test that regex-reserved characters do not get flagged as unescaped special characters in patterns. + */ + @Test + public void testRegexReservedCharacters() throws Exception { + // This is not a valid pattern, but patterns are not compiled in the visitor, so an exception will not be thrown. + givenQuery("FOO =~ '.+*?^$()[]{}|\\\\'"); + + // Do not expect any unescaped characters. + assertResult(); + } + + /** + * Test empty strings will not result in flagged special characters. + */ + @Test + public void testEmptyStrings() throws Exception { + givenQuery("FOO == '' || FOO =~ ''"); + + // Do not expect any unescaped chars. + assertResult(); + } + + /** + * Test that regex patterns inside of ER, NR, and function nodes are evaluated. + */ + @Test + public void testPossiblePatternLocations() throws Exception { + givenQuery("FOO =~ 'er&' && FOO !~ 'nr&' && filter:includeRegex(FOO, 'function&')"); + expectMessage("Regex pattern \"er&\" has the following unescaped special character(s): '&'"); + expectMessage("Regex pattern \"nr&\" has the following unescaped special character(s): '&'"); + expectMessage("Regex pattern \"function&\" has the following unescaped special character(s): '&'"); + assertResult(); + } + + /** + * Test that literal strings for EQ, NE, LT, GT, LE, and GE nodes are evaluated. + */ + @Test + public void testPossibleLiteralLocations() throws Exception { + givenQuery("FOO == 'eq&' || FOO != 'ne&' || FOO < 'lt&' || FOO > 'gt&' || FOO <= 'le&' || FOO >= 'ge&'"); + expectMessage("Literal string \"eq&\" has the following unescaped special character(s): '&'"); + expectMessage("Literal string \"ne&\" has the following unescaped special character(s): '&'"); + expectMessage("Literal string \"lt&\" has the following unescaped special character(s): '&'"); + expectMessage("Literal string \"gt&\" has the following unescaped special character(s): '&'"); + expectMessage("Literal string \"le&\" has the following unescaped special character(s): '&'"); + expectMessage("Literal string \"ge&\" has the following unescaped special character(s): '&'"); + assertResult(); + } + + @Test + public void testMultipleSpecialCharactersFound() throws Exception { + givenQuery("FOO == 'ab^123%34#' || FOO =~ '343&kje:jd@'"); + expectMessage("Literal string \"ab^123%34#\" has the following unescaped special character(s): '^', '%', '#'"); + expectMessage("Regex pattern \"343&kje:jd@\" has the following unescaped special character(s): '&', ':', '@'"); + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToJexl(); + } + + @Override + protected ShardQueryRule getNewRule() { + UnescapedSpecialCharsRule rule = new UnescapedSpecialCharsRule(ruleName); + rule.setLiteralExceptions(literalExceptions); + rule.setEscapedWhitespaceRequiredForLiterals(escapedWhitespaceRequiredForLiterals); + rule.setPatternExceptions(patternExceptions); + rule.setEscapedWhitespaceRequiredForPatterns(escapedWhitespaceRequiredForPatterns); + return rule; + } + + private void givenLiteralExceptions(Character... chars) { + this.literalExceptions.addAll(List.of(chars)); + } + + private void givenEscapedWhitespaceRequiredForLiterals(boolean bool) { + this.escapedWhitespaceRequiredForLiterals = bool; + } + + private void givenPatternExceptions(Character... chars) { + this.patternExceptions.addAll(List.of(chars)); + } + + private void givenEscapedWhitespaceRequiredForPatterns(boolean bool) { + this.escapedWhitespaceRequiredForPatterns = bool; + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/UnescapedWildcardsInPhrasesRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/UnescapedWildcardsInPhrasesRuleTest.java new file mode 100644 index 00000000000..e0103882d8f --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/UnescapedWildcardsInPhrasesRuleTest.java @@ -0,0 +1,66 @@ +package datawave.query.rules; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class UnescapedWildcardsInPhrasesRuleTest extends ShardQueryRuleTest { + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a query with a quoted phrase without wildcards. + */ + @Test + void testQuotedPhraseWithoutWildcards() throws Exception { + givenQuery("FOO:\"abc\""); + // Do not expect any phrases. + assertResult(); + } + + /** + * Test a query with an quoted phrase an escaped wildcard. + */ + @Test + void testQuotedPhraseWithEscapedWildcard() throws Exception { + // Backslash must be escaped here for it to remain in parsed query. + givenQuery("FOO:\"a\\\\*bc\""); + // Do not expect any phrases. + assertResult(); + } + + /** + * Test a query with quoted phrases with a non-escaped wildcard at the beginning, in the middle, and at the end of the phrase. + */ + @Test + void testQuotedPhraseWithUnescapedWildcard() throws Exception { + givenQuery("FOO:\"*abc\" OR FOO:\"de*f\" OR FOO:\"efg*\""); + expectMessage("Unescaped wildcard found in phrase FOO:\"*abc\". Wildcard is incorrect, or phrase should be FOO:/*abc/"); + expectMessage("Unescaped wildcard found in phrase FOO:\"de*f\". Wildcard is incorrect, or phrase should be FOO:/de*f/"); + expectMessage("Unescaped wildcard found in phrase FOO:\"efg*\". Wildcard is incorrect, or phrase should be FOO:/efg*/"); + assertResult(); + } + + /** + * Test a query with an unfielded quoted phrases with a non-escaped wildcard. + */ + @Test + void testUnfieldedQuotedPhraseWithUnescapedWildcard() throws Exception { + givenQuery("\"*abc\""); + expectMessage("Unescaped wildcard found in phrase \"*abc\". Wildcard is incorrect, or phrase should be /*abc/"); + assertResult(); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new UnescapedWildcardsInPhrasesRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/rules/UnfieldedTermsRuleTest.java b/warehouse/query-core/src/test/java/datawave/query/rules/UnfieldedTermsRuleTest.java new file mode 100644 index 00000000000..377dbfdc178 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/rules/UnfieldedTermsRuleTest.java @@ -0,0 +1,66 @@ +package datawave.query.rules; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class UnfieldedTermsRuleTest extends ShardQueryRuleTest { + + @BeforeEach + void setUp() { + givenRuleName(RULE_NAME); + expectRuleName(RULE_NAME); + } + + /** + * Test a query without unfielded terms. + */ + @Test + void testQueryWithoutUnfieldedTerms() throws Exception { + givenQuery("FOO:123 OR BAR:654"); + // Do not expect any messages. + assertResult(); + } + + /** + * Test a query with unfielded terms. + */ + @Test + void testQueryWithUnfieldedTerms() throws Exception { + givenQuery("FOO:123 643 OR abc 'bef'"); + + expectMessage("Unfielded term 643 found."); + expectMessage("Unfielded term abc found."); + expectMessage("Unfielded term 'bef' found."); + + assertResult(); + } + + /** + * Test that grouped terms directly after a field are not flagged. + */ + @Test + void testGroupedFieldTerms() throws Exception { + givenQuery("643 OR FIELD:(123 OR 456)"); + + expectMessage("Unfielded term 643 found."); + + assertResult(); + } + + @Test + void testUnfieldedQuotedPhrases() { + givenQuery("FOO:123 or \"abc\""); + + expectMessage("Unfielded term \"abc\" found."); + } + + @Override + protected Object parseQuery() throws Exception { + return parseQueryToLucene(); + } + + @Override + protected ShardQueryRule getNewRule() { + return new UnfieldedTermsRule(ruleName); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/query/tables/ShardQueryLogicQueryValidationTest.java b/warehouse/query-core/src/test/java/datawave/query/tables/ShardQueryLogicQueryValidationTest.java new file mode 100644 index 00000000000..69b34300b51 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/tables/ShardQueryLogicQueryValidationTest.java @@ -0,0 +1,399 @@ +package datawave.query.tables; + +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; +import java.util.UUID; + +import javax.inject.Inject; + +import org.apache.accumulo.core.client.AccumuloClient; +import org.apache.accumulo.core.security.Authorizations; +import org.apache.log4j.Logger; +import org.assertj.core.api.Assertions; +import org.easymock.EasyMock; +import org.jboss.arquillian.container.test.api.Deployment; +import org.jboss.arquillian.junit.Arquillian; +import org.jboss.shrinkwrap.api.ShrinkWrap; +import org.jboss.shrinkwrap.api.asset.StringAsset; +import org.jboss.shrinkwrap.api.spec.JavaArchive; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; + +import datawave.configuration.spring.SpringBean; +import datawave.helpers.PrintUtility; +import datawave.ingest.data.TypeRegistry; +import datawave.microservice.query.Query; +import datawave.microservice.query.QueryImpl; +import datawave.query.Constants; +import datawave.query.QueryParameters; +import datawave.query.QueryTestTableHelper; +import datawave.query.RebuildingScannerTestHelper; +import datawave.query.function.deserializer.KryoDocumentDeserializer; +import datawave.query.rules.AmbiguousNotRule; +import datawave.query.rules.AmbiguousOrPhrasesRule; +import datawave.query.rules.AmbiguousUnquotedPhrasesRule; +import datawave.query.rules.FieldExistenceRule; +import datawave.query.rules.FieldPatternPresenceRule; +import datawave.query.rules.IncludeExcludeArgsRule; +import datawave.query.rules.IncludeExcludeIndexFieldsRule; +import datawave.query.rules.InvalidQuoteRule; +import datawave.query.rules.MinimumSlopProximityRule; +import datawave.query.rules.NumericValueRule; +import datawave.query.rules.QueryRule; +import datawave.query.rules.QueryRuleResult; +import datawave.query.rules.QueryValidationResult; +import datawave.query.rules.TimeFunctionRule; +import datawave.query.rules.UnescapedSpecialCharsRule; +import datawave.query.rules.UnescapedWildcardsInPhrasesRule; +import datawave.query.rules.UnfieldedTermsRule; +import datawave.query.tables.edge.DefaultEdgeEventQueryLogic; +import datawave.query.util.WiseGuysIngest; +import datawave.util.TableName; +import datawave.webservice.edgedictionary.RemoteEdgeDictionary; +import datawave.webservice.query.exception.QueryException; + +@RunWith(Arquillian.class) +public class ShardQueryLogicQueryValidationTest { + + private static final Logger log = Logger.getLogger(ShardQueryLogicQueryValidationTest.class); + + private static final Authorizations auths = new Authorizations("ALL"); + private static final Set authSet = Collections.singleton(auths); + + private final DateFormat dateFormat = new SimpleDateFormat("yyyyMMdd"); + private final Map queryParameters = new HashMap<>(); + + private String query; + private Date startDate; + private Date endDate; + private boolean expandFields; + private boolean expandValues; + + private final List expectedRuleResults = new ArrayList<>(); + private Class expectedExceptionType; + private String expectedExceptionMessage; + + @Inject + @SpringBean(name = "EventQuery") + protected ShardQueryLogic logic; + protected KryoDocumentDeserializer deserializer; + + @Deployment + public static JavaArchive createDeployment() throws Exception { + return ShrinkWrap.create(JavaArchive.class) + .addPackages(true, "org.apache.deltaspike", "io.astefanutti.metrics.cdi", "datawave.query", "org.jboss.logging", + "datawave.webservice.query.result.event") + .deleteClass(DefaultEdgeEventQueryLogic.class).deleteClass(RemoteEdgeDictionary.class) + .deleteClass(datawave.query.metrics.QueryMetricQueryLogic.class) + .addAsManifestResource(new StringAsset( + "" + "datawave.query.tables.edge.MockAlternative" + ""), + "beans.xml"); + } + + @BeforeClass + public static void beforeClass() throws Exception { + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + } + + @AfterClass + public static void afterClass() throws Exception { + TypeRegistry.reset(); + } + + @Before + public void setup() throws ParseException { + this.logic.setFullTableScanEnabled(true); + this.deserializer = new KryoDocumentDeserializer(); + this.startDate = dateFormat.parse("20091231"); + this.endDate = dateFormat.parse("20150101"); + this.expandFields = true; + this.expandValues = false; + } + + @After + public void tearDown() throws Exception { + this.logic = null; + this.query = null; + this.queryParameters.clear(); + this.startDate = null; + this.endDate = null; + this.expectedRuleResults.clear(); + this.expectedExceptionType = null; + this.expectedExceptionMessage = null; + } + + private AccumuloClient createClient() throws Exception { + AccumuloClient client = new QueryTestTableHelper(ShardQueryLogicTest.ShardRange.class.toString(), log, + RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER_SANS_CONSISTENCY, RebuildingScannerTestHelper.INTERRUPT.EVERY_OTHER).client; + WiseGuysIngest.writeItAll(client, WiseGuysIngest.WhatKindaRange.SHARD); + PrintUtility.printTable(client, auths, TableName.SHARD); + PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); + PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); + return client; + } + + private Query createSettings() { + QueryImpl settings = new QueryImpl(); + settings.setBeginDate(this.startDate); + settings.setEndDate(this.endDate); + settings.setPagesize(Integer.MAX_VALUE); + settings.setQueryAuthorizations(auths.serialize()); + settings.setQuery(this.query); + settings.setParameters(this.queryParameters); + settings.setId(UUID.randomUUID()); + return settings; + } + + /** + * Validate that query rules are instantiated properly when defined from the QueryLogicFactory.xml. + */ + @Test + public void testBeanCreation() { + List expectedRules = new ArrayList<>(); + expectedRules.add(new InvalidQuoteRule("Check for Invalid Quote")); + expectedRules.add(new UnfieldedTermsRule("Check for Unfielded Terms")); + expectedRules.add(new UnescapedWildcardsInPhrasesRule("Check Quoted Phrases for Unescaped Wildcard")); + expectedRules.add(new AmbiguousNotRule("Check for Ambiguous Usage of NOT")); + expectedRules.add(new AmbiguousOrPhrasesRule("Check for Unfielded Terms That Could Be Wrapped")); + expectedRules.add(new AmbiguousUnquotedPhrasesRule("Check for Unfielded Terms That Could Be Quoted")); + expectedRules.add(new MinimumSlopProximityRule("Validate Slop Proximity")); + expectedRules.add(new IncludeExcludeArgsRule("Validate Args of #INCLUDE and #EXCLUDE")); + expectedRules.add(new FieldExistenceRule("Check Field Existence", Set.of("I_DO_NOT_EXIST", "_NOFIELD_", "_ANYFIELD_"))); + expectedRules.add(new UnescapedSpecialCharsRule("Check for Unescaped Special Characters", Set.of('?'), Set.of('_'), false, false)); + expectedRules.add(new FieldPatternPresenceRule("Check Presence of Field or Pattern", Map.of("_ANYFIELD_", "Detected presence of _ANYFIELD_"), + Map.of(".*", "Detected pattern '.*' that will match everything"))); + expectedRules.add(new IncludeExcludeIndexFieldsRule("Check #INCLUDE and #EXCLUDE for Indexed Fields")); + expectedRules.add(new NumericValueRule("Validate Numeric Values Only Given for Numeric Fields")); + expectedRules.add(new TimeFunctionRule("Validate #TIME_FUNCTION has Date Fields")); + + List actual = logic.getValidationRules(); + Assertions.assertThat(actual).containsExactlyElementsOf(expectedRules); + } + + /** + * Test that attempting to validate a query when no rules are configured results in an exception. + */ + @Test + public void testNoRulesConfigured() { + logic.setValidationRules(null); + + Assert.assertThrows("Query validation rules not configured.", IllegalStateException.class, () -> assertResult()); + } + + /** + * Test that when unparseable LUCENE is given, verify that the exception is encapsulated within the validation result. + */ + @Test + public void testLuceneParseException() throws Exception { + givenQuery("FOO:ab:c"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.LUCENE); + + expectExceptionInResult(QueryException.class, "Failed to parse query as LUCENE"); + + assertResult(); + } + + /** + * Test that when unparseable JEXL is given, verify that the exception is encapsulated within the validation result. + */ + @Test + public void testJexlParseException() throws Exception { + givenQuery("FOO == (ab"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.JEXL); + + expectExceptionInResult(QueryException.class, "Failed to parse query as JEXL"); + + assertResult(); + } + + /** + * Test that when an exception is thrown by a query rule validating a LUCENE query, that the exception is encapsulated within the validation result. + */ + @Test + public void testExceptionThrownByLuceneQueryRule() throws Exception { + // Set up a mock rule that will throw an exception when trying to validate a lucene query. + QueryRule mockRule = EasyMock.createMock(QueryRule.class); + EasyMock.expect(mockRule.getName()).andReturn("Mock Rule").anyTimes(); + EasyMock.expect(mockRule.copy()).andReturn(mockRule).anyTimes(); + EasyMock.expect(mockRule.canValidate(EasyMock.anyObject())).andThrow(new IllegalArgumentException("I failed!")); + EasyMock.replay(mockRule); + logic.setValidationRules(List.of(mockRule)); + + givenQuery("FOO:abc"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.LUCENE); + + expectExceptionInResult(QueryException.class, "Error occurred when validating against rule Mock Rule"); + + assertResult(); + } + + /** + * Test that when an exception is thrown by a query rule validating a JEXL query, that the exception is encapsulated within the validation result. + */ + @Test + public void testExceptionThrownByJexlQueryRule() throws Exception { + // Set up a mock rule that will throw an exception when trying to validate a lucene query. + QueryRule mockRule = EasyMock.createMock(QueryRule.class); + EasyMock.expect(mockRule.getName()).andReturn("Mock Rule").anyTimes(); + EasyMock.expect(mockRule.copy()).andReturn(mockRule).anyTimes(); + EasyMock.expect(mockRule.canValidate(EasyMock.anyObject())).andThrow(new IllegalArgumentException("I failed!")); + EasyMock.replay(mockRule); + logic.setValidationRules(List.of(mockRule)); + + givenQuery("FOO == abc"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.JEXL); + + expectExceptionInResult(QueryException.class, "Error occurred when validating against rule Mock Rule"); + + assertResult(); + } + + /** + * Test a LUCENE query that will result in a message from a {@link InvalidQuoteRule}. + */ + @Test + public void testLuceneQueryFlaggedByLuceneRule() throws Exception { + givenQuery("FOO:`abc`"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.LUCENE); + + // Update the rules to just the one we want to test for simplicity. + logic.setValidationRules(List.of(new InvalidQuoteRule("Check for Invalid Quote"))); + + expectRuleResult(QueryRuleResult.of("Check for Invalid Quote", "Invalid quote ` used in phrase \"FOO:`abc`\". Use ' instead.")); + + assertResult(); + } + + /** + * Test a LUCENE query that will not result in a message from a {@link InvalidQuoteRule}. + */ + @Test + public void testLuceneQueryNotFlaggedByLuceneRule() throws Exception { + givenQuery("FOO:'abc'"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.LUCENE); + + // Update the rules to just the one we want to test for simplicity. + logic.setValidationRules(List.of(new InvalidQuoteRule("Check for Invalid Quote"))); + + // We should still get a query rule result with the rule name, just not with any message. + expectRuleResult(QueryRuleResult.of("Check for Invalid Quote")); + + assertResult(); + } + + /** + * Test a JEXL query that will result in a message from a {@link FieldPatternPresenceRule}. + * + * @throws Exception + */ + @Test + public void testJexlQueryFlaggedByJexlRule() throws Exception { + givenQuery("_ANYFIELD_ == '123'"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.JEXL); + + // Update the rules to just the one we want to test for simplicity. + logic.setValidationRules(List.of(new FieldPatternPresenceRule("Check Presence of Field or Pattern", + Map.of("_ANYFIELD_", "Detected presence of _ANYFIELD_"), Map.of(".*", "Detected pattern '.*' that will match everything")))); + + expectRuleResult(QueryRuleResult.of("Check Presence of Field or Pattern", "Detected presence of _ANYFIELD_")); + + assertResult(); + } + + /** + * Test a JEXL query that will not result in a message from a {@link FieldPatternPresenceRule}. + * + * @throws Exception + */ + @Test + public void testJexlQueryNotFlaggedByJexlRule() throws Exception { + givenQuery("FOO == '123'"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.JEXL); + + // Update the rules to just the one we want to test for simplicity. + logic.setValidationRules(List.of(new FieldPatternPresenceRule("Check Presence of Field or Pattern", + Map.of("_ANYFIELD_", "Detected presence of _ANYFIELD_"), Map.of(".*", "Detected pattern '.*' that will match everything")))); + + // We should still get a query rule result with the rule name, just not with any message. + expectRuleResult(QueryRuleResult.of("Check Presence of Field or Pattern")); + + assertResult(); + } + + /** + * Test a LUCENE query that should get flagged by both LUCENE and JEXL rules. + */ + @Test + public void testQueryFlaggedByMultipleRules() throws Exception { + givenQuery("FOO:`abc` AND BAR:abc def ghi OR _ANYFIELD_:123"); + givenQueryParameter(QueryParameters.QUERY_SYNTAX, Constants.LUCENE); + + // Update the rules to just the one we want to test for simplicity. + // @formatter:off + logic.setValidationRules(List.of( + new InvalidQuoteRule("Check For Invalid Quote"), + new AmbiguousUnquotedPhrasesRule("Check for Unfielded Terms That Could Be Quoted"), + new FieldPatternPresenceRule("Check Presence of Field or Pattern", + Map.of("_ANYFIELD_", "Detected presence of _ANYFIELD_"), Map.of(".*", "Detected pattern '.*' that will match everything")))); + // @formatter:on + + expectRuleResult(QueryRuleResult.of("Check For Invalid Quote", "Invalid quote ` used in phrase \"FOO:`abc`\". Use ' instead.")); + expectRuleResult(QueryRuleResult.of("Check for Unfielded Terms That Could Be Quoted", + "Ambiguous unfielded terms AND'd with fielded term detected: BAR:abc AND def AND ghi Recommended: BAR:\"abc def ghi\"")); + expectRuleResult(QueryRuleResult.of("Check Presence of Field or Pattern", "Detected presence of _ANYFIELD_")); + assertResult(); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void givenQueryParameter(String parameter, String value) { + this.queryParameters.put(parameter, value); + } + + private void givenExpandFields(boolean expandFields) { + this.expandFields = expandFields; + } + + private void givenExpandValues(boolean expandValues) { + this.expandValues = expandValues; + } + + private void expectRuleResult(QueryRuleResult ruleResult) { + this.expectedRuleResults.add(ruleResult); + } + + private void expectExceptionInResult(Class type, String message) { + this.expectedExceptionType = type; + this.expectedExceptionMessage = message; + } + + private void assertResult() throws Exception { + AccumuloClient client = createClient(); + Query settings = createSettings(); + QueryValidationResult actualResult = (QueryValidationResult) logic.validateQuery(client, settings, authSet, expandFields, expandValues); + + Assertions.assertThat(actualResult.getRuleResults()).isEqualTo(expectedRuleResults); + if (expectedExceptionType == null) { + Assertions.assertThat(actualResult.getException()).isNull(); + } else { + Assertions.assertThat(actualResult.getException()).hasMessage(expectedExceptionMessage).isInstanceOf(expectedExceptionType); + } + } + +} diff --git a/warehouse/query-core/src/test/java/datawave/query/tables/ShardQueryLogicTest.java b/warehouse/query-core/src/test/java/datawave/query/tables/ShardQueryLogicTest.java index ee244d7388e..473546cba29 100644 --- a/warehouse/query-core/src/test/java/datawave/query/tables/ShardQueryLogicTest.java +++ b/warehouse/query-core/src/test/java/datawave/query/tables/ShardQueryLogicTest.java @@ -1,6 +1,7 @@ package datawave.query.tables; import java.text.DateFormat; +import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; @@ -25,6 +26,7 @@ import org.jboss.shrinkwrap.api.ShrinkWrap; import org.jboss.shrinkwrap.api.asset.StringAsset; import org.jboss.shrinkwrap.api.spec.JavaArchive; +import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; @@ -39,7 +41,9 @@ import datawave.core.query.iterator.DatawaveTransformIterator; import datawave.helpers.PrintUtility; import datawave.ingest.data.TypeRegistry; +import datawave.microservice.query.Query; import datawave.microservice.query.QueryImpl; +import datawave.query.QueryParameters; import datawave.query.QueryTestTableHelper; import datawave.query.RebuildingScannerTestHelper; import datawave.query.function.deserializer.KryoDocumentDeserializer; @@ -53,84 +57,47 @@ import datawave.webservice.result.BaseQueryResponse; import datawave.webservice.result.DefaultEventQueryResponse; -/** - * generic ShardQueryLogic tests -testPrimaryToSecondaryFieldMapForQueryProfile - * - */ public abstract class ShardQueryLogicTest { private static final Logger log = Logger.getLogger(ShardQueryLogicTest.class); + private static final Authorizations auths = new Authorizations("ALL"); + private static final Set authSet = Collections.singleton(auths); + + @Inject + @SpringBean(name = "EventQuery") + protected ShardQueryLogic logic; + protected KryoDocumentDeserializer deserializer; + + private final DateFormat dateFormat = new SimpleDateFormat("yyyyMMdd"); + private final Map queryParameters = new HashMap<>(); + + private String query; + private Date startDate; + private Date endDate; + + protected abstract String getRange(); + @RunWith(Arquillian.class) public static class ShardRange extends ShardQueryLogicTest { - protected static AccumuloClient connector = null; - private static Authorizations auths = new Authorizations("ALL"); - - @BeforeClass - public static void setUp() throws Exception { - - // testing tear downs but without consistency, because when we tear it down then we loose the ongoing bloom filter and subsequently the rebuild will - // start returning - // different keys. - QueryTestTableHelper qtth = new QueryTestTableHelper(ShardRange.class.toString(), log, - RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER_SANS_CONSISTENCY, RebuildingScannerTestHelper.INTERRUPT.EVERY_OTHER); - connector = qtth.client; - - WiseGuysIngest.writeItAll(connector, WiseGuysIngest.WhatKindaRange.SHARD); - PrintUtility.printTable(connector, auths, TableName.SHARD); - PrintUtility.printTable(connector, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(connector, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - } @Override - protected void runTestQuery(Set> expected, String querystr, Date startDate, Date endDate, Map extraParms) throws Exception { - super.runTestQuery(expected, querystr, startDate, endDate, extraParms, connector); + protected String getRange() { + return WiseGuysIngest.WhatKindaRange.SHARD.name(); } } @RunWith(Arquillian.class) public static class DocumentRange extends ShardQueryLogicTest { - protected static AccumuloClient connector = null; - private static Authorizations auths = new Authorizations("ALL"); - - @BeforeClass - public static void setUp() throws Exception { - - // testing tear downs but without consistency, because when we tear it down then we loose the ongoing bloom filter and subsequently the rebuild will - // start returning - // different keys. - QueryTestTableHelper qtth = new QueryTestTableHelper(DocumentRange.class.toString(), log, - RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER_SANS_CONSISTENCY, RebuildingScannerTestHelper.INTERRUPT.EVERY_OTHER); - connector = qtth.client; - - WiseGuysIngest.writeItAll(connector, WiseGuysIngest.WhatKindaRange.DOCUMENT); - Authorizations auths = new Authorizations("ALL"); - PrintUtility.printTable(connector, auths, TableName.SHARD); - PrintUtility.printTable(connector, auths, TableName.SHARD_INDEX); - PrintUtility.printTable(connector, auths, QueryTestTableHelper.MODEL_TABLE_NAME); - } @Override - protected void runTestQuery(Set> expected, String querystr, Date startDate, Date endDate, Map extraParms) throws Exception { - super.runTestQuery(expected, querystr, startDate, endDate, extraParms, connector); + protected String getRange() { + return WiseGuysIngest.WhatKindaRange.DOCUMENT.name(); } } - protected Authorizations auths = new Authorizations("ALL"); - - protected Set authSet = Collections.singleton(auths); - - @Inject - @SpringBean(name = "EventQuery") - protected ShardQueryLogic logic; - - protected KryoDocumentDeserializer deserializer; - - private final DateFormat format = new SimpleDateFormat("yyyyMMdd"); - @Deployment public static JavaArchive createDeployment() throws Exception { - return ShrinkWrap.create(JavaArchive.class) .addPackages(true, "org.apache.deltaspike", "io.astefanutti.metrics.cdi", "datawave.query", "org.jboss.logging", "datawave.webservice.query.result.event") @@ -141,39 +108,62 @@ public static JavaArchive createDeployment() throws Exception { "beans.xml"); } + @BeforeClass + public static void beforeClass() throws Exception { + TimeZone.setDefault(TimeZone.getTimeZone("GMT")); + } + @AfterClass - public static void teardown() { + public static void afterClass() throws Exception { TypeRegistry.reset(); } @Before public void setup() { - TimeZone.setDefault(TimeZone.getTimeZone("GMT")); - - logic.setFullTableScanEnabled(true); - deserializer = new KryoDocumentDeserializer(); + this.logic.setFullTableScanEnabled(true); + this.deserializer = new KryoDocumentDeserializer(); } - protected abstract void runTestQuery(Set> expected, String querystr, Date startDate, Date endDate, Map extraParms) - throws Exception; + @After + public void tearDown() throws Exception { + this.logic = null; + this.query = null; + this.queryParameters.clear(); + this.startDate = null; + this.endDate = null; + } - protected void runTestQuery(Set> expected, String querystr, Date startDate, Date endDate, Map extraParms, - AccumuloClient connector) throws Exception { - log.debug("runTestQuery"); + private AccumuloClient createClient() throws Exception { + AccumuloClient client = new QueryTestTableHelper(ShardRange.class.toString(), log, RebuildingScannerTestHelper.TEARDOWN.EVERY_OTHER_SANS_CONSISTENCY, + RebuildingScannerTestHelper.INTERRUPT.EVERY_OTHER).client; + WiseGuysIngest.writeItAll(client, WiseGuysIngest.WhatKindaRange.valueOf(getRange())); + PrintUtility.printTable(client, auths, TableName.SHARD); + PrintUtility.printTable(client, auths, TableName.SHARD_INDEX); + PrintUtility.printTable(client, auths, QueryTestTableHelper.MODEL_TABLE_NAME); + return client; + } + private Query createSettings() { QueryImpl settings = new QueryImpl(); - settings.setBeginDate(startDate); - settings.setEndDate(endDate); + settings.setBeginDate(this.startDate); + settings.setEndDate(this.endDate); settings.setPagesize(Integer.MAX_VALUE); settings.setQueryAuthorizations(auths.serialize()); - settings.setQuery(querystr); - settings.setParameters(extraParms); + settings.setQuery(this.query); + settings.setParameters(this.queryParameters); settings.setId(UUID.randomUUID()); + return settings; + } + protected void runTestQuery(Set> expected) throws Exception { + log.debug("runTestQuery"); + + Query settings = createSettings(); log.debug("query: " + settings.getQuery()); log.debug("logic: " + settings.getQueryLogicName()); - GenericQueryConfiguration config = logic.initialize(connector, settings, authSet); + AccumuloClient client = createClient(); + GenericQueryConfiguration config = logic.initialize(client, settings, authSet); logic.setupQuery(config); DocumentTransformer transformer = (DocumentTransformer) (logic.getTransformer(settings)); @@ -219,197 +209,209 @@ protected void runTestQuery(Set> expected, String querystr, Date sta @Test public void testFieldMappingTransformViaProfile() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "false"); - extraParameters.put("query.profile", "copyFieldEventQuery"); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID =~ '^[CS].*'"; + givenQuery("UUID =~ '^[CS].*'"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "false"); + givenQueryParameter(QueryParameters.QUERY_PROFILE, "copyFieldEventQuery"); + givenStartDate("20091231"); + givenEndDate("20150101"); Set> expected = new HashSet<>(); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.sopranoUID, "MAGIC_COPY:18")); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.corleoneUID, "MAGIC_COPY:18")); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID, "MAGIC_COPY:18")); - runTestQuery(expected, queryString, startDate, endDate, extraParameters); + runTestQuery(expected); } @Test public void testRegex() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND QUOTE=~'.*kind'"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND QUOTE=~'.*kind'"; Set> expected = new HashSet<>(); // todo: make this work someday // expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID)); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); - + runTestQuery(expected); } @Test public void testFwdRegex() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND QUOTE=~'kin.*'"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND QUOTE=~'kin.*'"; Set> expected = new HashSet<>(); // todo: make this work someday // expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID)); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); - + runTestQuery(expected); } @Test public void testEvalRegex() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND ((_Eval_ = true) && QUOTE=~'.*alone')"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND ((_Eval_ = true) && QUOTE=~'.*alone')"; Set> expected = new HashSet<>(); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID)); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); + runTestQuery(expected); } @Test public void testNegativeEvalRegex() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND ((_Eval_ = true) && QUOTE!~'.*alone')"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND ((_Eval_ = true) && QUOTE!~'.*alone')"; Set> expected = new HashSet<>(); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); - + runTestQuery(expected); } @Test public void testNegativeEvalRegexV2() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND ((_Eval_ = true) && !(QUOTE=~'.*alone'))"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND ((_Eval_ = true) && !(QUOTE=~'.*alone'))"; Set> expected = new HashSet<>(); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); - + runTestQuery(expected); } @Test public void testDoubeWildcard() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND QUOTE=~'.*ind.*'"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND QUOTE=~'.*ind.*'"; Set> expected = new HashSet<>(); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID)); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); + runTestQuery(expected); } @Test public void testNegativeRegex() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND QUOTE!~'.*ind'"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND QUOTE!~'.*ind'"; Set> expected = new HashSet<>(); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID)); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); + runTestQuery(expected); } @Test public void testNegativeRegexV2() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND !(QUOTE=~'.*ind')"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND !(QUOTE=~'.*ind')"; Set> expected = new HashSet<>(); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID)); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); + runTestQuery(expected); } @Test public void testFilterRegex() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND filter:includeRegex(QUOTE,'.*kind word alone.*')"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); - String queryString = "UUID=='CAPONE' AND filter:includeRegex(QUOTE,'.*kind word alone.*')"; Set> expected = new HashSet<>(); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.caponeUID)); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); + runTestQuery(expected); } @Test public void testNegativeFilterRegex() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); + givenQuery("UUID=='CAPONE' AND !filter:includeRegex(QUOTE,'.*kind word alone.*')"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); String queryString = "UUID=='CAPONE' AND !filter:includeRegex(QUOTE,'.*kind word alone.*')"; Set> expected = new HashSet<>(); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); + runTestQuery(expected); } @Test public void testNegativeFilterRegexV2() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("include.grouping.context", "true"); - - String queryString = "UUID=='CAPONE' AND !(filter:includeRegex(QUOTE,'.*kind word alone.*'))"; + givenQuery("UUID=='CAPONE' AND !(filter:includeRegex(QUOTE,'.*kind word alone.*'))"); + givenQueryParameter(QueryParameters.INCLUDE_GROUPING_CONTEXT, "true"); + givenStartDate("20091231"); + givenEndDate("20150101"); Set> expected = new HashSet<>(); - runTestQuery(expected, queryString, format.parse("20091231"), format.parse("20150101"), extraParameters); + runTestQuery(expected); } @Test public void testExcludeDataTypesBangDataType() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("datatype.filter.set", "!test2"); + givenQuery("UUID=='TATTAGLIA'"); + givenQueryParameter(QueryParameters.DATATYPE_FILTER_SET, "!test2"); + givenStartDate("20091231"); + givenEndDate("20150101"); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID=='TATTAGLIA'"; Set> expected = new HashSet<>(); // No results expected - - runTestQuery(expected, queryString, startDate, endDate, extraParameters); + runTestQuery(expected); } @Test public void testExcludeDataTypesNegateDataType() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("datatype.filter.set", "test2,!test2"); + givenQuery("UUID=='TATTAGLIA'"); + givenQueryParameter(QueryParameters.DATATYPE_FILTER_SET, "test2,!test2"); + givenStartDate("20091231"); + givenEndDate("20150101"); - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID=='TATTAGLIA'"; Set> expected = new HashSet<>(); // Expect one result, since the negated data type results in empty set, which is treated by Datawave as all data types expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.tattagliaUID)); - runTestQuery(expected, queryString, startDate, endDate, extraParameters); + runTestQuery(expected); } @Test public void testExcludeDataTypesIncludeOneTypeExcludeOneType() throws Exception { - Map extraParameters = new HashMap<>(); - extraParameters.put("datatype.filter.set", "test2,!test"); - - Date startDate = format.parse("20091231"); - Date endDate = format.parse("20150101"); - - String queryString = "UUID=='TATTAGLIA' || UUID=='CAPONE'"; + givenQuery("UUID=='TATTAGLIA' || UUID=='CAPONE'"); + givenQueryParameter(QueryParameters.DATATYPE_FILTER_SET, "test2,!test"); + givenStartDate("20091231"); + givenEndDate("20150101"); Set> expected = new HashSet<>(); expected.add(Sets.newHashSet("UID:" + WiseGuysIngest.tattagliaUID)); - runTestQuery(expected, queryString, startDate, endDate, extraParameters); + runTestQuery(expected); + } + + private void givenQuery(String query) { + this.query = query; + } + + private void givenQueryParameter(String parameter, String value) { + this.queryParameters.put(parameter, value); + } + + private void givenStartDate(String date) throws ParseException { + this.startDate = dateFormat.parse(date); + } + + private void givenEndDate(String date) throws ParseException { + this.endDate = dateFormat.parse(date); } } diff --git a/warehouse/query-core/src/test/java/datawave/query/transformer/QueryValidationResultTransformerTest.java b/warehouse/query-core/src/test/java/datawave/query/transformer/QueryValidationResultTransformerTest.java new file mode 100644 index 00000000000..7dcf8a9f453 --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/query/transformer/QueryValidationResultTransformerTest.java @@ -0,0 +1,173 @@ +package datawave.query.transformer; + +import java.util.ArrayList; +import java.util.List; + +import datawave.webservice.query.exception.QueryExceptionType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import datawave.query.rules.QueryRuleResult; +import datawave.query.rules.QueryValidationResult; +import datawave.webservice.result.QueryValidationResponse; + +class QueryValidationResultTransformerTest { + + private final QueryValidationResultTransformer transformer = new QueryValidationResultTransformer(); + + private QueryValidationResult result; + + private QueryValidationResponse expectedResponse; + + @AfterEach + void tearDown() { + result = null; + expectedResponse = null; + } + + /** + * Test an empty query validation result. + */ + @Test + void testEmptyResult() { + givenResult(new QueryValidationResult()); + + QueryValidationResponse response = new QueryValidationResponse(); + response.setExecutedRules(List.of()); + expectResponse(response); + + assertResult(); + } + + /** + * Test a validation result that only has an exception. + */ + @Test + void testResultWithExceptionOnly() { + QueryValidationResult result = new QueryValidationResult(); + IllegalArgumentException exception = new IllegalArgumentException("I failed!"); + result.setException(exception); + givenResult(result); + + QueryValidationResponse response = new QueryValidationResponse(); + response.addException(exception); + response.setExecutedRules(List.of()); + expectResponse(response); + + assertResult(); + } + + /** + * Test a validation result with query results that should be included. + */ + @Test + void testResultWithRuleResultsOnly() { + IllegalArgumentException exception = new IllegalArgumentException("I failed!"); + + QueryValidationResult result = new QueryValidationResult(); + result.addRuleResult(QueryRuleResult.of("", "I am unnamed")); + result.addRuleResult(QueryRuleResult.of("Rule 1", "I have a message", "I have two messages")); + result.addRuleResult(QueryRuleResult.of("Rule 2 with no message")); + result.addRuleResult(QueryRuleResult.of("Rule 3 with exception", exception)); + givenResult(result); + + List responseList = new ArrayList<>(); + responseList.add(new QueryValidationResponse.Result("UNNAMED_RULE", List.of("I am unnamed"), null)); + responseList.add(new QueryValidationResponse.Result("Rule 1", List.of("I have a message", "I have two messages"), null)); + responseList.add(new QueryValidationResponse.Result("Rule 3 with exception", List.of(), new QueryExceptionType("I failed!", null, null))); + QueryValidationResponse response = new QueryValidationResponse(); + response.setResults(responseList); + response.setExecutedRules(List.of("UNNAMED_RULE", "Rule 1", "Rule 2 with no message", "Rule 3 with exception")); + expectResponse(response); + + assertResult(); + } + + /** + * Test a validation result with query results and an exception. + */ + @Test + void testResultWithRuleResultsAndException() { + IllegalArgumentException exception = new IllegalArgumentException("I failed!"); + IllegalArgumentException secondException = new IllegalArgumentException("I failed big time."); + + QueryValidationResult result = new QueryValidationResult(); + result.addRuleResult(QueryRuleResult.of("", "I am unnamed")); + result.addRuleResult(QueryRuleResult.of("Rule 1", "I have a message", "I have two messages")); + result.addRuleResult(QueryRuleResult.of("Rule 2 with no message")); + result.addRuleResult(QueryRuleResult.of("Rule 3 with exception", exception)); + result.setException(secondException); + givenResult(result); + + List responseList = new ArrayList<>(); + responseList.add(new QueryValidationResponse.Result("UNNAMED_RULE", List.of("I am unnamed"), null)); + responseList.add(new QueryValidationResponse.Result("Rule 1", List.of("I have a message", "I have two messages"), null)); + responseList.add(new QueryValidationResponse.Result("Rule 3 with exception", List.of(), new QueryExceptionType("I failed!", null, null))); + QueryValidationResponse response = new QueryValidationResponse(); + response.setResults(responseList); + response.setExecutedRules(List.of("UNNAMED_RULE", "Rule 1", "Rule 2 with no message", "Rule 3 with exception")); + response.addException(secondException); + expectResponse(response); + + assertResult(); + } + + /** + * Test a validation result that had a query rule result with an exception with a cause. + */ + @Test + void testResultWithRuleResultWithExceptionWithCause() { + IllegalArgumentException cause = new IllegalArgumentException("I failed!"); + IllegalArgumentException exception = new IllegalArgumentException("I failed big time.", cause); + + QueryValidationResult result = new QueryValidationResult(); + result.addRuleResult(QueryRuleResult.of("Rule Name", exception)); + result.setException(exception); + givenResult(result); + + List responseList = new ArrayList<>(); + responseList.add(new QueryValidationResponse.Result("Rule Name", List.of(), new QueryExceptionType("I failed big time.", "I failed!", null))); + QueryValidationResponse response = new QueryValidationResponse(); + response.setResults(responseList); + response.setExecutedRules(List.of("Rule Name")); + response.addException(exception); + expectResponse(response); + + assertResult(); + } + + /** + * Test a validation result with no relevant query results that should be included. + */ + @Test + void testResultWithNoRelevantRuleResults() { + IllegalArgumentException exception = new IllegalArgumentException("I failed!"); + + QueryValidationResult result = new QueryValidationResult(); + result.addRuleResult(QueryRuleResult.of("")); + result.addRuleResult(QueryRuleResult.of("Rule 1")); + result.addRuleResult(QueryRuleResult.of("Rule 2")); + result.addRuleResult(QueryRuleResult.of("Rule 3")); + givenResult(result); + + QueryValidationResponse response = new QueryValidationResponse(); + response.setExecutedRules(List.of("UNNAMED_RULE", "Rule 1", "Rule 2", "Rule 3")); + expectResponse(response); + + assertResult(); + } + + private void givenResult(QueryValidationResult result) { + this.result = result; + } + + private void expectResponse(QueryValidationResponse response) { + this.expectedResponse = response; + } + + private void assertResult() { + QueryValidationResponse actual = transformer.transform(result); + Assertions.assertEquals(expectedResponse, actual); + } +} diff --git a/warehouse/query-core/src/test/java/datawave/test/QueryRuleResultAssert.java b/warehouse/query-core/src/test/java/datawave/test/QueryRuleResultAssert.java new file mode 100644 index 00000000000..68c3b1ed42c --- /dev/null +++ b/warehouse/query-core/src/test/java/datawave/test/QueryRuleResultAssert.java @@ -0,0 +1,106 @@ +package datawave.test; + +import java.util.List; +import java.util.Objects; + +import org.assertj.core.api.AbstractAssert; +import org.assertj.core.api.Assertions; + +import datawave.query.rules.QueryRuleResult; + +/** + * This class provides the ability to perform a number of assertions specific to {@link QueryRuleResult} instances, and is intended to be used for testing + * purposes. + */ +public class QueryRuleResultAssert extends AbstractAssert { + + /** + * Return a new {@link QueryRuleResultAssert} that will perform assertions on the specified result. + * + * @param result + * the result + * @return a new {@link QueryRuleResultAssert} for the result + */ + public static QueryRuleResultAssert assertThat(QueryRuleResult result) { + return new QueryRuleResultAssert(result); + } + + public QueryRuleResultAssert(QueryRuleResult result) { + super(result, QueryRuleResultAssert.class); + } + + /** + * Verifies that the actual result's rule name is equal to the given one. + * + * @param ruleName + * the rule name + * @return this {@link QueryRuleResultAssert} + */ + public QueryRuleResultAssert hasRuleName(String ruleName) { + isNotNull(); + if (!Objects.equals(actual.getRuleName(), ruleName)) { + failWithMessage("Expected ruleName to be %s but was %s", ruleName, actual.getRuleName()); + } + return this; + } + + /** + * Verifies that the actual result's exception is null. + * + * @return this {@link QueryRuleResultAssert} + */ + public QueryRuleResultAssert hasNullException() { + isNotNull(); + if (actual.getException() != null) { + failWithMessage("Expected exception to be null, but was %s", actual.getException()); + } + return this; + } + + /** + * Verifies that the actual result's exception is equal to the given one. + * + * @param exception + * the exception + * @return this {@link QueryRuleResultAssert} + */ + public QueryRuleResultAssert hasException(Exception exception) { + isNotNull(); + if (!Objects.equals(actual.getException(), exception)) { + failWithMessage("Expected exception to be %s but was %s", exception, actual.getException()); + } + return this; + } + + /** + * Verifies that the actual result's messages is equal to the given list. + * + * @param messages + * the messages + * @return this {@link QueryRuleResultAssert} + */ + public QueryRuleResultAssert hasMessages(List messages) { + isNotNull(); + if (!Objects.equals(actual.getMessages(), messages)) { + failWithMessage("Expected messages to be %n %s%n but was %n %s%n", messages, actual.getException()); + } + return this; + } + + /** + * Verifies that the actual result's messages contains exactly the elements of the given list in any order. + * + * @param messages + * the messages + * @return this {@link QueryRuleResultAssert} + */ + public QueryRuleResultAssert hasExactMessagesInAnyOrder(List messages) { + isNotNull(); + // @formatter:off + Assertions.assertThat(actual.getMessages()) + .describedAs("Check messages contains exactly in any order") + .containsExactlyInAnyOrderElementsOf(messages); + // @formatter:on + return this; + } +} diff --git a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml index 7c043448b9e..cfedd05de7a 100644 --- a/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml +++ b/warehouse/query-core/src/test/resources/datawave/query/QueryLogicFactory.xml @@ -158,7 +158,92 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + I_DO_NOT_EXIST + _ANYFIELD_ + _NOFIELD_ + + + + + + + + + + + + + + + + + + + + + + + ? + + + + + _ + + + + + + @@ -285,6 +370,24 @@ + + + + + + + + + + + + + + + + + + @@ -388,4 +491,5 @@ + diff --git a/web-services/client/src/main/java/datawave/webservice/result/QueryValidationResponse.java b/web-services/client/src/main/java/datawave/webservice/result/QueryValidationResponse.java new file mode 100644 index 00000000000..f895aa56f9b --- /dev/null +++ b/web-services/client/src/main/java/datawave/webservice/result/QueryValidationResponse.java @@ -0,0 +1,161 @@ +package datawave.webservice.result; + +import datawave.webservice.query.exception.ExceptionMessages; +import datawave.webservice.query.exception.QueryExceptionType; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.StringJoiner; + +import javax.xml.bind.annotation.XmlAccessOrder; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorOrder; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlElementWrapper; +import javax.xml.bind.annotation.XmlRootElement; + +@XmlRootElement(name = "QueryValidationResponse") +@XmlAccessorType(XmlAccessType.NONE) +@XmlAccessorOrder(XmlAccessOrder.ALPHABETICAL) +public class QueryValidationResponse extends BaseResponse { + + private static final long serialVersionUID = 1L; + + @XmlElement(name = "LogicName") + private String logicName; + + @XmlElement(name = "QueryId") + private String queryId; + + @XmlElementWrapper(name = "Results") + @XmlElement(name = "Result") + private List results; + + @XmlElementWrapper(name = "ExecutedRules") + @XmlElement(name = "RuleName") + private List executedRules; + + public String getLogicName() { + return logicName; + } + + public void setLogicName(String logicName) { + this.logicName = logicName; + } + + public String getQueryId() { + return queryId; + } + + public void setQueryId(String queryId) { + this.queryId = queryId; + } + + public List getResults() { + return results; + } + + public void setResults(List results) { + this.results = results; + } + + public List getExecutedRules() { + return executedRules; + } + + public void setExecutedRules(List executedRules) { + this.executedRules = executedRules; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + QueryValidationResponse response = (QueryValidationResponse) object; + return Objects.equals(logicName, response.logicName) && Objects.equals(queryId, response.queryId) && Objects.equals(results, response.results) + && Objects.equals(executedRules, response.executedRules); + } + + @Override + public int hashCode() { + return Objects.hash(logicName, queryId, results, executedRules); + } + + @Override + public String toString() { + return new StringJoiner(", ", QueryValidationResponse.class.getSimpleName() + "[", "]").add("logicName='" + logicName + "'") + .add("queryId='" + queryId + "'").add("results=" + results).add("executedRules=" + executedRules).toString(); + } + + public static class Result { + + @XmlElement(name = "RuleName") + private String ruleName; + + @XmlElement(name = "Messages") + private List messages; + + @XmlElement(name = "Exception") + private QueryExceptionType exception; + + public Result(String ruleName, List messages, QueryExceptionType exception) { + this.ruleName = ruleName; + this.messages = messages; + this.exception = exception; + } + + public String getRuleName() { + return ruleName; + } + + public void setRuleName(String ruleName) { + this.ruleName = ruleName; + } + + public List getMessages() { + return messages; + } + + public void setMessages(List messages) { + this.messages = messages; + } + + public QueryExceptionType getException() { + return exception; + } + + public void setException(QueryExceptionType exception) { + this.exception = exception; + } + + @Override + public boolean equals(Object object) { + if (this == object) { + return true; + } + if (object == null || getClass() != object.getClass()) { + return false; + } + Result result = (Result) object; + return Objects.equals(ruleName, result.ruleName) && Objects.equals(messages, result.messages) && Objects.equals(exception, result.exception); + } + + @Override + public int hashCode() { + return Objects.hash(ruleName, messages, exception); + } + + @Override + public String toString() { + return new StringJoiner(", ", Result.class.getSimpleName() + "[", "]").add("ruleName='" + ruleName + "'").add("messages=" + messages) + .add("exception=" + exception).toString(); + } + } +} diff --git a/web-services/query/src/main/java/datawave/webservice/query/runner/QueryExecutorBean.java b/web-services/query/src/main/java/datawave/webservice/query/runner/QueryExecutorBean.java index ddb81dc9a7d..c8fd38f18ea 100644 --- a/web-services/query/src/main/java/datawave/webservice/query/runner/QueryExecutorBean.java +++ b/web-services/query/src/main/java/datawave/webservice/query/runner/QueryExecutorBean.java @@ -72,6 +72,7 @@ import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.util.Pair; +import org.apache.commons.collections4.Transformer; import org.apache.commons.jexl3.parser.TokenMgrException; import org.apache.deltaspike.core.api.exclude.Exclude; import org.apache.log4j.Logger; @@ -166,6 +167,7 @@ import datawave.webservice.result.GenericResponse; import datawave.webservice.result.QueryImplListResponse; import datawave.webservice.result.QueryLogicResponse; +import datawave.webservice.result.QueryValidationResponse; import datawave.webservice.result.VoidResponse; import io.protostuff.LinkedBuffer; import io.protostuff.Message; @@ -3010,11 +3012,121 @@ private void updateQueryParams(Query q, String queryLogicName, String query, Dat @Path("/{logicName}/validate") @Interceptors({RequiredInterceptor.class, ResponseInterceptor.class}) @Timed(name = "dw.query.validateQuery", absolute = true) - public GenericResponse validateQuery(@Required("logicName") @PathParam("logicName") String queryLogicName, + public QueryValidationResponse validateQuery(@Required("logicName") @PathParam("logicName") String queryLogicName, MultivaluedMap queryParameters) { - GenericResponse response = new GenericResponse<>(); - response.setMessages(Collections.singletonList("Query validator coming soon.")); - throw new DatawaveWebApplicationException(new UnsupportedOperationException("Query validator not implemented"), response, 501); + QueryData queryData = validateQuery(queryLogicName, queryParameters, null); + + QueryValidationResponse response = new QueryValidationResponse(); + + Query query = null; + AccumuloClient client = null; + AccumuloConnectionFactory.Priority priority; + RunningQuery runningQuery = null; + + try { + // by default we will expand the fields but not the values. + boolean expandFields = true; + boolean expandValues = false; + if (queryParameters.containsKey(EXPAND_FIELDS)) { + expandFields = Boolean.valueOf(queryParameters.getFirst(EXPAND_FIELDS)); + } + if (queryParameters.containsKey(EXPAND_VALUES)) { + expandValues = Boolean.valueOf(queryParameters.getFirst(EXPAND_VALUES)); + } + + AuditType auditType = queryData.logic.getAuditType(); + try { + // The query should be transient. + qp.setPersistenceMode(QueryPersistence.TRANSIENT); + Map> optionalQueryParameters = qp.getUnknownParameters(MapUtils.toMultivaluedMap(queryParameters)); + query = persister.create(queryData.userDn, queryData.dnList, marking, queryLogicName, qp, MapUtils.toMultivaluedMap(optionalQueryParameters)); + auditType = queryData.logic.getAuditType(); + } finally { + queryParameters.add(PrivateAuditConstants.AUDIT_TYPE, auditType.name()); + + if (!auditType.equals(AuditType.NONE)) { + // audit the query before its executed. + try { + try { + List selectors = queryData.logic.getSelectors(query); + if (selectors != null && !selectors.isEmpty()) { + queryParameters.put(PrivateAuditConstants.SELECTORS, selectors); + } + } catch (Exception e) { + log.error("Error accessing query selector", e); + } + // if the user didn't set an audit id, use the query id + if (!queryParameters.containsKey(AuditParameters.AUDIT_ID) && query != null) { + queryParameters.putSingle(AuditParameters.AUDIT_ID, query.getId().toString()); + } + auditor.audit(MapUtils.toMultiValueMap(queryParameters)); + } catch (IllegalArgumentException e) { + log.error("Error validating audit parameters", e); + BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.MISSING_REQUIRED_PARAMETER, e); + response = new QueryValidationResponse(); + response.addException(qe); + throw new BadRequestException(qe, response); + } catch (Exception e) { + log.error("Error auditing query", e); + QueryException qe = new QueryException(DatawaveErrorCode.QUERY_AUDITING_ERROR, e); + response = new QueryValidationResponse(); + response.addException(qe); + throw qe; + } + } + } + + priority = queryData.logic.getConnectionPriority(); + Map trackingMap = connectionFactory.getTrackingMap(Thread.currentThread().getStackTrace()); + query.populateTrackingMap(trackingMap); + accumuloConnectionRequestBean.requestBegin(query.getId().toString(), queryData.userDn, trackingMap); + try { + client = connectionFactory.getClient(queryData.userDn, queryData.proxyServers, queryData.logic.getConnPoolName(), priority, trackingMap); + } finally { + accumuloConnectionRequestBean.requestEnd(query.getId().toString()); + } + + // the query principal is our local principal unless the query logic has a different user operations + if (qp.getAuths() != null) { + queryData.logic.preInitialize(query, + WSAuthorizationsUtil.buildAuthorizations(Collections.singleton(WSAuthorizationsUtil.splitAuths(qp.getAuths())))); + } else { + queryData.logic.preInitialize(query, WSAuthorizationsUtil.buildAuthorizations(null)); + } + DatawavePrincipal queryPrincipal = (DatawavePrincipal) ((queryData.logic.getUserOperations() == null) ? queryData.p + : queryData.logic.getUserOperations().getRemoteUser((DatawavePrincipal) queryData.p)); + // the overall principal (the one with combined auths across remote user operations) is our own user operations bean + DatawavePrincipal overallPrincipal = (DatawavePrincipal) userOperationsBean.getRemoteUser((DatawavePrincipal) queryData.p); + Set calculatedAuths = WSAuthorizationsUtil.getDowngradedAuthorizations(qp.getAuths(), overallPrincipal, queryPrincipal); + + // Validate the query. + Object validationResult = queryData.logic.validateQuery(client, query, calculatedAuths, expandFields, expandValues); + // Convert the validation results to a response. + Transformer responseTransformer = queryData.logic.getQueryValidationResponseTransformer(); + response = responseTransformer.transform(validationResult); + response.setQueryId(query.getId().toString()); + response.setLogicName(queryLogicName); + } catch (Throwable throwable) { + // Close the logic on exception. + try { + if (null != queryData.logic) { + queryData.logic.close(); + } + } catch (Exception e) { + log.error("Exception occurred while closing query logic; may be innocuous if scanners were running.", e); + } + + // Depersist the query. + try { + if (null != query) { + persister.remove(query); + } + } catch (Exception e) { + response.addException(new QueryException(DatawaveErrorCode.DEPERSIST_ERROR, e).getBottomQueryException()); + } + } + + return response; } /**