Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
Signed-off-by: Petar Dzepina <[email protected]>
  • Loading branch information
petardz committed Jan 26, 2023
1 parent f9eb9bf commit 4727bb2
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

package org.opensearch.index.query;

import java.util.Set;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -161,6 +162,8 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder<QueryStringQue

private ZoneId timeZone;

List<String> discoveredFields;

/** To limit effort spent determinizing regexp queries. */
private int maxDeterminizedStates = DEFAULT_DETERMINIZE_WORK_LIMIT;

Expand Down Expand Up @@ -872,13 +875,7 @@ protected int doHashCode() {
);
}

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
String rewrittenQueryString = escape ? org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString) : queryString;
if (fieldsAndWeights.size() > 0 && this.defaultField != null) {
throw addValidationError("cannot use [fields] parameter in conjunction with [default_field]", null);
}

private QueryStringQueryParser newQueryParser(QueryShardContext context) {
QueryStringQueryParser queryParser;
boolean isLenient = lenient == null ? context.queryStringLenient() : lenient;
if (defaultField != null) {
Expand Down Expand Up @@ -946,6 +943,39 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
queryParser.setDeterminizeWorkLimit(maxDeterminizedStates);
queryParser.setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
queryParser.setFuzzyTranspositions(fuzzyTranspositions);
return queryParser;
}

public Set<String> extractAllUsedFields(QueryShardContext context) {
String rewrittenQueryString = escape ? org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString) : queryString;
if (fieldsAndWeights.size() > 0 && this.defaultField != null) {
throw addValidationError("cannot use [fields] parameter in conjunction with [default_field]", null);
}

QueryStringQueryParser queryParser = newQueryParser(context);

Query query;
try {
query = queryParser.parse(rewrittenQueryString);
} catch (org.apache.lucene.queryparser.classic.ParseException e) {
throw new QueryShardException(context, "Failed to parse query [" + this.queryString + "]", e);
}

if (query == null) {
return null;
}

return queryParser.getDiscoveredQueryFields();
}

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
String rewrittenQueryString = escape ? org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString) : queryString;
if (fieldsAndWeights.size() > 0 && this.defaultField != null) {
throw addValidationError("cannot use [fields] parameter in conjunction with [default_field]", null);
}

QueryStringQueryParser queryParser = newQueryParser(context);

Query query;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@

package org.opensearch.index.search;

import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
Expand Down Expand Up @@ -113,6 +115,8 @@ public class QueryStringQueryParser extends XQueryParser {
private MultiTermQuery.RewriteMethod fuzzyRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE;
private boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions;

private Set<String> discoveredQueryFields = new HashSet<>();

/**
* @param context The query shard context.
* @param defaultField The default field for query terms.
Expand Down Expand Up @@ -327,6 +331,8 @@ public Query getFieldQuery(String field, String queryText, boolean quoted) throw
return getFieldQuery(field, queryText, getPhraseSlop());
}

onDiscoveredField(field);

// Detects additional operators '<', '<=', '>', '>=' to handle range query with one side unbounded.
// It is required to use a prefix field operator to enable the detection since they are not treated
// as logical operator by the query parser (e.g. age:>=10).
Expand Down Expand Up @@ -361,6 +367,9 @@ public Query getFieldQuery(String field, String queryText, boolean quoted) throw
// if there is no match in the mappings.
return newUnmappedFieldQuery(field);
}
for (Map.Entry<String, Float> entry : fields.entrySet()) {
onDiscoveredField(entry.getKey());
}
Analyzer oldAnalyzer = queryBuilder.analyzer;
try {
if (forceAnalyzer != null) {
Expand All @@ -374,6 +383,13 @@ public Query getFieldQuery(String field, String queryText, boolean quoted) throw
}
}

private void onDiscoveredField(String field) {
if (field == null || Regex.isSimpleMatchPattern(field)) {
return;
}
discoveredQueryFields.add(field);
}

@Override
protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException {
if (field != null && EXISTS_FIELD.equals(field)) {
Expand All @@ -384,6 +400,9 @@ protected Query getFieldQuery(String field, String queryText, int slop) throws P
if (fields.isEmpty()) {
return newUnmappedFieldQuery(field);
}
for (Map.Entry<String, Float> entry : fields.entrySet()) {
onDiscoveredField(entry.getKey());
}
Analyzer oldAnalyzer = queryBuilder.analyzer;
int oldSlop = queryBuilder.phraseSlop;
try {
Expand Down Expand Up @@ -423,6 +442,7 @@ protected Query getRangeQuery(String field, String part1, String part2, boolean

List<Query> queries = new ArrayList<>();
for (Map.Entry<String, Float> entry : fields.entrySet()) {
onDiscoveredField(entry.getKey());
Query q = getRangeQuerySingle(entry.getKey(), part1, part2, startInclusive, endInclusive, context);
assert q != null;
queries.add(applyBoost(q, entry.getValue()));
Expand All @@ -448,6 +468,7 @@ private Query getRangeQuerySingle(
return newUnmappedFieldQuery(field);
}
try {
onDiscoveredField(currentFieldType.name());
Analyzer normalizer = forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer;
BytesRef part1Binary = part1 == null ? null : normalizer.normalize(field, part1);
BytesRef part2Binary = part2 == null ? null : normalizer.normalize(field, part2);
Expand All @@ -472,6 +493,7 @@ private Query getRangeQuerySingle(

@Override
protected Query handleBareFuzzy(String field, Token fuzzySlop, String termImage) throws ParseException {
onDiscoveredField(field);
if (fuzzySlop.image.length() == 1) {
return getFuzzyQuery(field, termImage, fuzziness.asDistance(termImage));
}
Expand All @@ -487,6 +509,7 @@ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
}
List<Query> queries = new ArrayList<>();
for (Map.Entry<String, Float> entry : fields.entrySet()) {
onDiscoveredField(entry.getKey());
Query q = getFuzzyQuerySingle(entry.getKey(), termStr, minSimilarity);
assert q != null;
queries.add(applyBoost(q, entry.getValue()));
Expand All @@ -506,6 +529,7 @@ private Query getFuzzyQuerySingle(String field, String termStr, float minSimilar
return newUnmappedFieldQuery(field);
}
try {
onDiscoveredField(field);
Analyzer normalizer = forceAnalyzer == null ? queryBuilder.context.getSearchAnalyzer(currentFieldType) : forceAnalyzer;
BytesRef term = termStr == null ? null : normalizer.normalize(field, termStr);
return currentFieldType.fuzzyQuery(
Expand All @@ -526,6 +550,7 @@ private Query getFuzzyQuerySingle(String field, String termStr, float minSimilar

@Override
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
onDiscoveredField(term.field());
int numEdits = Fuzziness.build(minimumSimilarity).asDistance(term.text());
if (fuzzyRewriteMethod != null) {
return new FuzzyQuery(term, numEdits, prefixLength, fuzzyMaxExpansions, fuzzyTranspositions, fuzzyRewriteMethod);
Expand All @@ -542,6 +567,7 @@ protected Query getPrefixQuery(String field, String termStr) throws ParseExcepti
}
List<Query> queries = new ArrayList<>();
for (Map.Entry<String, Float> entry : fields.entrySet()) {
onDiscoveredField(entry.getKey());
Query q = getPrefixQuerySingle(entry.getKey(), termStr);
if (q != null) {
queries.add(applyBoost(q, entry.getValue()));
Expand All @@ -564,6 +590,7 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc
if (currentFieldType == null || currentFieldType.getTextSearchInfo() == TextSearchInfo.NONE) {
return newUnmappedFieldQuery(field);
}
onDiscoveredField(field);
setAnalyzer(getSearchAnalyzer(currentFieldType));
Query query = null;
if (currentFieldType.getTextSearchInfo().isTokenized() == false) {
Expand Down Expand Up @@ -598,6 +625,7 @@ private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr, Mappe
source = getAnalyzer().tokenStream(field, termStr);
source.reset();
} catch (IOException e) {
onDiscoveredField(field);
return super.getPrefixQuery(field, termStr);
}
tlist = new ArrayList<>();
Expand Down Expand Up @@ -642,20 +670,24 @@ private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr, Mappe
Query posQuery;
if (plist.size() == 1) {
if (isLastPos) {
onDiscoveredField(currentFieldType.name());
posQuery = currentFieldType.prefixQuery(plist.get(0), getMultiTermRewriteMethod(), context);
} else {
onDiscoveredField(field);
posQuery = newTermQuery(new Term(field, plist.get(0)), BoostAttribute.DEFAULT_BOOST);
}
} else if (isLastPos == false) {
// build a synonym query for terms in the same position.
SynonymQuery.Builder sb = new SynonymQuery.Builder(field);
for (String synonym : plist) {
onDiscoveredField(field);
sb.addTerm(new Term(field, synonym));
}
posQuery = sb.build();
} else {
List<BooleanClause> innerClauses = new ArrayList<>();
for (String token : plist) {
onDiscoveredField(field);
innerClauses.add(new BooleanClause(super.getPrefixQuery(field, token), BooleanClause.Occur.SHOULD));
}
posQuery = getBooleanQuery(innerClauses);
Expand All @@ -674,6 +706,9 @@ private Query existsQuery(String fieldName) {
if (fieldNamesFieldType == null) {
return new MatchNoDocsQuery("No mappings yet");
}

onDiscoveredField(fieldName);

if (fieldNamesFieldType.isEnabled() == false) {
// The field_names_field is disabled so we switch to a wildcard query that matches all terms
return new WildcardQuery(new Term(fieldName, "*"));
Expand All @@ -699,6 +734,7 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep
}
List<Query> queries = new ArrayList<>();
for (Map.Entry<String, Float> entry : fields.entrySet()) {
onDiscoveredField(entry.getKey());
Query q = getWildcardQuerySingle(entry.getKey(), termStr);
assert q != null;
queries.add(applyBoost(q, entry.getValue()));
Expand All @@ -724,6 +760,7 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
}
if (forceAnalyzer != null && (analyzeWildcard || currentFieldType.getTextSearchInfo().isTokenized())) {
setAnalyzer(forceAnalyzer);
onDiscoveredField(currentFieldType.name());
return super.getWildcardQuery(currentFieldType.name(), termStr);
}
if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) {
Expand Down Expand Up @@ -771,6 +808,7 @@ protected Query getRegexpQuery(String field, String termStr) throws ParseExcepti
for (Map.Entry<String, Float> entry : fields.entrySet()) {
Query q = getRegexpQuerySingle(entry.getKey(), termStr);
assert q != null;
onDiscoveredField(entry.getKey());
queries.add(applyBoost(q, entry.getValue()));
}
if (queries.size() == 1) {
Expand All @@ -789,6 +827,7 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc
return newUnmappedFieldQuery(field);
}
setAnalyzer(getSearchAnalyzer(currentFieldType));
onDiscoveredField(field);
return super.getRegexpQuery(field, termStr);
} catch (RuntimeException e) {
if (lenient) {
Expand All @@ -809,6 +848,10 @@ protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseExcepti
return fixNegativeQueryIfNeeded(q);
}

public Set<String> getDiscoveredQueryFields() {
return this.discoveredQueryFields;
}

private Query applySlop(Query q, int slop) {
if (q instanceof PhraseQuery) {
// make sure that the boost hasn't been set beforehand, otherwise we'd lose it
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

package org.opensearch.index.query;

import java.util.Set;
import org.apache.lucene.tests.analysis.MockSynonymAnalyzer;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
Expand Down Expand Up @@ -427,6 +428,12 @@ public void testTimezoneEquals() {
assertNotEquals(builder1, builder2);
}

public void testExtractAllUsedFields() {
Set<String> allUsedFields = queryStringQuery("test").defaultField(TEXT_FIELD_NAME + "*").extractAllUsedFields(createShardContext());
assertTrue(allUsedFields.contains(TEXT_FIELD_NAME));
assertTrue(allUsedFields.contains(KEYWORD_FIELD_NAME));
}

public void testIllegalArguments() {
expectThrows(IllegalArgumentException.class, () -> new QueryStringQueryBuilder((String) null));
}
Expand Down

0 comments on commit 4727bb2

Please sign in to comment.