diff --git a/server/src/main/java/org/opensearch/index/query/QueryStringQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/QueryStringQueryBuilder.java index 4ee790291f453..f72e069230d36 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryStringQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/QueryStringQueryBuilder.java @@ -32,6 +32,7 @@ package org.opensearch.index.query; +import java.util.Set; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.Query; @@ -161,6 +162,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder discoveredFields; + /** To limit effort spent determinizing regexp queries. */ private int maxDeterminizedStates = DEFAULT_DETERMINIZE_WORK_LIMIT; @@ -872,13 +875,7 @@ protected int doHashCode() { ); } - @Override - protected Query doToQuery(QueryShardContext context) throws IOException { - String rewrittenQueryString = escape ? org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString) : queryString; - if (fieldsAndWeights.size() > 0 && this.defaultField != null) { - throw addValidationError("cannot use [fields] parameter in conjunction with [default_field]", null); - } - + private QueryStringQueryParser newQueryParser(QueryShardContext context) { QueryStringQueryParser queryParser; boolean isLenient = lenient == null ? context.queryStringLenient() : lenient; if (defaultField != null) { @@ -946,6 +943,39 @@ protected Query doToQuery(QueryShardContext context) throws IOException { queryParser.setDeterminizeWorkLimit(maxDeterminizedStates); queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); queryParser.setFuzzyTranspositions(fuzzyTranspositions); + return queryParser; + } + + public Set extractAllUsedFields(QueryShardContext context) { + String rewrittenQueryString = escape ? org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString) : queryString; + if (fieldsAndWeights.size() > 0 && this.defaultField != null) { + throw addValidationError("cannot use [fields] parameter in conjunction with [default_field]", null); + } + + QueryStringQueryParser queryParser = newQueryParser(context); + + Query query; + try { + query = queryParser.parse(rewrittenQueryString); + } catch (org.apache.lucene.queryparser.classic.ParseException e) { + throw new QueryShardException(context, "Failed to parse query [" + this.queryString + "]", e); + } + + if (query == null) { + return null; + } + + return queryParser.getDiscoveredQueryFields(); + } + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + String rewrittenQueryString = escape ? org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString) : queryString; + if (fieldsAndWeights.size() > 0 && this.defaultField != null) { + throw addValidationError("cannot use [fields] parameter in conjunction with [default_field]", null); + } + + QueryStringQueryParser queryParser = newQueryParser(context); Query query; try { diff --git a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java index 9a121fe55a7e7..ea314bd3c2ed9 100644 --- a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java +++ b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java @@ -32,6 +32,8 @@ package org.opensearch.index.search; +import java.util.HashSet; +import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -113,6 +115,8 @@ public class QueryStringQueryParser extends XQueryParser { private MultiTermQuery.RewriteMethod fuzzyRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE; private boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions; + private Set discoveredQueryFields = new HashSet<>(); + /** * @param context The query shard context. * @param defaultField The default field for query terms. @@ -327,6 +331,8 @@ public Query getFieldQuery(String field, String queryText, boolean quoted) throw return getFieldQuery(field, queryText, getPhraseSlop()); } + onDiscoveredField(field); + // Detects additional operators '<', '<=', '>', '>=' to handle range query with one side unbounded. // It is required to use a prefix field operator to enable the detection since they are not treated // as logical operator by the query parser (e.g. age:>=10). @@ -361,6 +367,9 @@ public Query getFieldQuery(String field, String queryText, boolean quoted) throw // if there is no match in the mappings. return newUnmappedFieldQuery(field); } + for (Map.Entry entry : fields.entrySet()) { + onDiscoveredField(entry.getKey()); + } Analyzer oldAnalyzer = queryBuilder.analyzer; try { if (forceAnalyzer != null) { @@ -374,6 +383,13 @@ public Query getFieldQuery(String field, String queryText, boolean quoted) throw } } + private void onDiscoveredField(String field) { + if (field == null || Regex.isSimpleMatchPattern(field)) { + return; + } + discoveredQueryFields.add(field); + } + @Override protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { if (field != null && EXISTS_FIELD.equals(field)) { @@ -384,6 +400,9 @@ protected Query getFieldQuery(String field, String queryText, int slop) throws P if (fields.isEmpty()) { return newUnmappedFieldQuery(field); } + for (Map.Entry entry : fields.entrySet()) { + onDiscoveredField(entry.getKey()); + } Analyzer oldAnalyzer = queryBuilder.analyzer; int oldSlop = queryBuilder.phraseSlop; try { @@ -423,6 +442,7 @@ protected Query getRangeQuery(String field, String part1, String part2, boolean List queries = new ArrayList<>(); for (Map.Entry entry : fields.entrySet()) { + onDiscoveredField(entry.getKey()); Query q = getRangeQuerySingle(entry.getKey(), part1, part2, startInclusive, endInclusive, context); assert q != null; queries.add(applyBoost(q, entry.getValue())); @@ -448,6 +468,7 @@ private Query getRangeQuerySingle( return newUnmappedFieldQuery(field); } try { + onDiscoveredField(currentFieldType.name()); Analyzer normalizer = forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer; BytesRef part1Binary = part1 == null ? null : normalizer.normalize(field, part1); BytesRef part2Binary = part2 == null ? null : normalizer.normalize(field, part2); @@ -472,6 +493,7 @@ private Query getRangeQuerySingle( @Override protected Query handleBareFuzzy(String field, Token fuzzySlop, String termImage) throws ParseException { + onDiscoveredField(field); if (fuzzySlop.image.length() == 1) { return getFuzzyQuery(field, termImage, fuzziness.asDistance(termImage)); } @@ -487,6 +509,7 @@ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) } List queries = new ArrayList<>(); for (Map.Entry entry : fields.entrySet()) { + onDiscoveredField(entry.getKey()); Query q = getFuzzyQuerySingle(entry.getKey(), termStr, minSimilarity); assert q != null; queries.add(applyBoost(q, entry.getValue())); @@ -506,6 +529,7 @@ private Query getFuzzyQuerySingle(String field, String termStr, float minSimilar return newUnmappedFieldQuery(field); } try { + onDiscoveredField(field); Analyzer normalizer = forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer; BytesRef term = termStr == null ? null : normalizer.normalize(field, termStr); return currentFieldType.fuzzyQuery( @@ -526,6 +550,7 @@ private Query getFuzzyQuerySingle(String field, String termStr, float minSimilar @Override protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { + onDiscoveredField(term.field()); int numEdits = Fuzziness.build(minimumSimilarity).asDistance(term.text()); if (fuzzyRewriteMethod != null) { return new FuzzyQuery(term, numEdits, prefixLength, fuzzyMaxExpansions, fuzzyTranspositions, fuzzyRewriteMethod); @@ -542,6 +567,7 @@ protected Query getPrefixQuery(String field, String termStr) throws ParseExcepti } List queries = new ArrayList<>(); for (Map.Entry entry : fields.entrySet()) { + onDiscoveredField(entry.getKey()); Query q = getPrefixQuerySingle(entry.getKey(), termStr); if (q != null) { queries.add(applyBoost(q, entry.getValue())); @@ -564,6 +590,7 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc if (currentFieldType == null || currentFieldType.getTextSearchInfo() == TextSearchInfo.NONE) { return newUnmappedFieldQuery(field); } + onDiscoveredField(field); setAnalyzer(getSearchAnalyzer(currentFieldType)); Query query = null; if (currentFieldType.getTextSearchInfo().isTokenized() == false) { @@ -598,6 +625,7 @@ private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr, Mappe source = getAnalyzer().tokenStream(field, termStr); source.reset(); } catch (IOException e) { + onDiscoveredField(field); return super.getPrefixQuery(field, termStr); } tlist = new ArrayList<>(); @@ -642,20 +670,24 @@ private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr, Mappe Query posQuery; if (plist.size() == 1) { if (isLastPos) { + onDiscoveredField(currentFieldType.name()); posQuery = currentFieldType.prefixQuery(plist.get(0), getMultiTermRewriteMethod(), context); } else { + onDiscoveredField(field); posQuery = newTermQuery(new Term(field, plist.get(0)), BoostAttribute.DEFAULT_BOOST); } } else if (isLastPos == false) { // build a synonym query for terms in the same position. SynonymQuery.Builder sb = new SynonymQuery.Builder(field); for (String synonym : plist) { + onDiscoveredField(field); sb.addTerm(new Term(field, synonym)); } posQuery = sb.build(); } else { List innerClauses = new ArrayList<>(); for (String token : plist) { + onDiscoveredField(field); innerClauses.add(new BooleanClause(super.getPrefixQuery(field, token), BooleanClause.Occur.SHOULD)); } posQuery = getBooleanQuery(innerClauses); @@ -674,6 +706,9 @@ private Query existsQuery(String fieldName) { if (fieldNamesFieldType == null) { return new MatchNoDocsQuery("No mappings yet"); } + + onDiscoveredField(fieldName); + if (fieldNamesFieldType.isEnabled() == false) { // The field_names_field is disabled so we switch to a wildcard query that matches all terms return new WildcardQuery(new Term(fieldName, "*")); @@ -699,6 +734,7 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep } List queries = new ArrayList<>(); for (Map.Entry entry : fields.entrySet()) { + onDiscoveredField(entry.getKey()); Query q = getWildcardQuerySingle(entry.getKey(), termStr); assert q != null; queries.add(applyBoost(q, entry.getValue())); @@ -724,6 +760,7 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE } if (forceAnalyzer != null && (analyzeWildcard || currentFieldType.getTextSearchInfo().isTokenized())) { setAnalyzer(forceAnalyzer); + onDiscoveredField(currentFieldType.name()); return super.getWildcardQuery(currentFieldType.name(), termStr); } if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) { @@ -771,6 +808,7 @@ protected Query getRegexpQuery(String field, String termStr) throws ParseExcepti for (Map.Entry entry : fields.entrySet()) { Query q = getRegexpQuerySingle(entry.getKey(), termStr); assert q != null; + onDiscoveredField(entry.getKey()); queries.add(applyBoost(q, entry.getValue())); } if (queries.size() == 1) { @@ -789,6 +827,7 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc return newUnmappedFieldQuery(field); } setAnalyzer(getSearchAnalyzer(currentFieldType)); + onDiscoveredField(field); return super.getRegexpQuery(field, termStr); } catch (RuntimeException e) { if (lenient) { @@ -809,6 +848,10 @@ protected Query getBooleanQuery(List clauses) throws ParseExcepti return fixNegativeQueryIfNeeded(q); } + public Set getDiscoveredQueryFields() { + return this.discoveredQueryFields; + } + private Query applySlop(Query q, int slop) { if (q instanceof PhraseQuery) { // make sure that the boost hasn't been set beforehand, otherwise we'd lose it diff --git a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java index ad39cd831a30c..f7cb8aa943418 100644 --- a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java @@ -32,6 +32,7 @@ package org.opensearch.index.query; +import java.util.Set; import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; @@ -427,6 +428,12 @@ public void testTimezoneEquals() { assertNotEquals(builder1, builder2); } + public void testExtractAllUsedFields() { + Set allUsedFields = queryStringQuery("test").defaultField(TEXT_FIELD_NAME + "*").extractAllUsedFields(createShardContext()); + assertTrue(allUsedFields.contains(TEXT_FIELD_NAME)); + assertTrue(allUsedFields.contains(KEYWORD_FIELD_NAME)); + } + public void testIllegalArguments() { expectThrows(IllegalArgumentException.class, () -> new QueryStringQueryBuilder((String) null)); }