Skip to content

Commit

Permalink
Fixed scenario when both nested fields and alias filter are present
Browse files Browse the repository at this point in the history
Signed-off-by: Martin Gaievski <[email protected]>
  • Loading branch information
martin-gaievski committed Apr 5, 2024
1 parent fce70fb commit 5a6669e
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 54 deletions.
10 changes: 5 additions & 5 deletions src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,29 +32,29 @@ public final class HybridQuery extends Query implements Iterable<Query> {
/**
* Create new instance of hybrid query object based on collection of sub queries and filter query
* @param subQueries collection of queries that are executed individually and contribute to a final list of combined scores
* @param filterQuery filter that will be applied to each sub query. If this is null sub queries will be executed as is
* @param filterQueries list of filters that will be applied to each sub query. Each filter from the list is added as bool "filter" clause. If this is null sub queries will be executed as is
*/
public HybridQuery(final Collection<Query> subQueries, final Query filterQuery) {
public HybridQuery(final Collection<Query> subQueries, final List<Query> filterQueries) {
Objects.requireNonNull(subQueries, "collection of queries must not be null");
if (subQueries.isEmpty()) {
throw new IllegalArgumentException("collection of queries must not be empty");
}
if (Objects.isNull(filterQuery)) {
if (Objects.isNull(filterQueries) || filterQueries.isEmpty()) {
this.subQueries = new ArrayList<>(subQueries);
} else {
List<Query> modifiedSubQueries = new ArrayList<>();
for (Query subQuery : subQueries) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(subQuery, BooleanClause.Occur.MUST);
builder.add(filterQuery, BooleanClause.Occur.FILTER);
filterQueries.forEach(filterQuery -> builder.add(filterQuery, BooleanClause.Occur.FILTER));
modifiedSubQueries.add(builder.build());
}
this.subQueries = modifiedSubQueries;
}
}

public HybridQuery(final Collection<Query> subQueries) {
this(subQueries, null);
this(subQueries, List.of());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;

import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.search.BooleanClause;
Expand Down Expand Up @@ -59,22 +60,20 @@ private static boolean isWrappedHybridQuery(final Query query) {

@VisibleForTesting
protected Query extractHybridQuery(final SearchContext searchContext, final Query query) {
if (hasNestedFieldOrNestedDocs(query, searchContext)
if ((hasAliasFilter(query, searchContext) || hasNestedFieldOrNestedDocs(query, searchContext))
&& isWrappedHybridQuery(query)
&& !((BooleanQuery) query).clauses().isEmpty()) {
// extract hybrid query and replace bool with hybrid query
List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
if (booleanClauses.isEmpty() || booleanClauses.get(0).getQuery() instanceof HybridQuery == false) {
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level bool query");
}
return booleanClauses.get(0).getQuery();
} else if (hasAliasFilter(query, searchContext) && isWrappedHybridQuery(query) && !((BooleanQuery) query).clauses().isEmpty()) {
List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
if (!(booleanClauses.get(0).getQuery() instanceof HybridQuery)) {
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level query");
}
HybridQuery hybridQuery = (HybridQuery) booleanClauses.get(0).getQuery();
HybridQuery hybridQueryWithFilter = new HybridQuery(hybridQuery.getSubQueries(), searchContext.aliasFilter());
HybridQuery hybridQuery = (HybridQuery) booleanClauses.stream().findFirst().get().getQuery();
List<Query> filterQueries = booleanClauses.stream()
.skip(1)
.filter(clause -> BooleanClause.Occur.FILTER == clause.getOccur())
.map(BooleanClause::getQuery)
.collect(Collectors.toList());
HybridQuery hybridQueryWithFilter = new HybridQuery(hybridQuery.getSubQueries(), filterQueries);
return hybridQueryWithFilter;
}
return query;
Expand Down
44 changes: 14 additions & 30 deletions src/main/java/org/opensearch/neuralsearch/util/HybridQueryUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@

import lombok.AccessLevel;
import lombok.NoArgsConstructor;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.Query;
import org.opensearch.index.mapper.SeqNoFieldMapper;
import org.opensearch.index.search.NestedHelper;
import org.opensearch.neuralsearch.query.HybridQuery;
import org.opensearch.search.internal.SearchContext;
Expand All @@ -27,43 +24,30 @@ public static boolean isHybridQuery(final Query query, final SearchContext searc
if (query instanceof HybridQuery) {
return true;
} else if (isWrappedHybridQuery(query)) {
if ((hasNestedFieldOrNestedDocs(query, searchContext))) {
/* Checking if this is a hybrid query that is wrapped into a Bool query by core Opensearch code
https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/DefaultSearchContext.java#L367-L370.
main reason for that is performance optimization, at time of writing we are ok with loosing on performance if that's unblocks
hybrid query for indexes with nested field types.
in such case we consider query a valid hybrid query. Later in the code we will extract it and execute as a main query for
this search request.
below is sample structure of such query:
/* Checking if this is a hybrid query that is wrapped into a Bool query by core Opensearch code
https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/DefaultSearchContext.java#L367-L370.
main reason for that is performance optimization, at time of writing we are ok with loosing on performance if that's unblocks
hybrid query for indexes with nested field types.
in such case we consider query a valid hybrid query. Later in the code we will extract it and execute as a main query for
this search request.
below is sample structure of such query:
Boolean {
should: {
Boolean {
should: {
hybrid: {
sub_query1 {}
sub_query2 {}
}
}
filter: {
}
filter: {
exists: {
field: "_primary_term"
}
}
}
TODO Need to add logic for passing hybrid sub-queries through the same logic in core to ensure there is no latency regression */
// we have already checked if query in instance of Boolean in higher level else if condition
return ((BooleanQuery) query).clauses()
.stream()
.filter(clause -> clause.getQuery() instanceof HybridQuery == false)
.allMatch(
clause -> clause.getOccur() == BooleanClause.Occur.FILTER
&& clause.getQuery() instanceof FieldExistsQuery
&& SeqNoFieldMapper.PRIMARY_TERM_NAME.equals(((FieldExistsQuery) clause.getQuery()).getField())
);
} else if (hasAliasFilter(query, searchContext)) {
// Checking case with alias that has a filter
return true;
}
return false;
*/
// we have already checked if query in instance of Boolean in higher level else if condition
return hasNestedFieldOrNestedDocs(query, searchContext) || hasAliasFilter(query, searchContext);
}
return false;
}
Expand Down
13 changes: 11 additions & 2 deletions src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -678,10 +678,19 @@ private void initializeIndexIfNotExist(String indexName) throws IOException {
}

if (TEST_MULTI_DOC_INDEX_NAME.equals(indexName) && !indexExists(TEST_MULTI_DOC_INDEX_NAME)) {
prepareKnnIndex(
createIndexWithConfiguration(
indexName,
buildIndexConfiguration(
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE)),
List.of(TEST_NESTED_TYPE_FIELD_NAME_1),
1
),
""
);
/*prepareKnnIndex(
TEST_MULTI_DOC_INDEX_NAME,
Collections.singletonList(new KNNFieldConfig(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DIMENSION, TEST_SPACE_TYPE))
);
);*/
addDocsToIndex(TEST_MULTI_DOC_INDEX_NAME);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ public void testFilter_whenSubQueriesWithFilterPassed_thenSuccessful() {
QueryBuilders.termQuery(TEXT_FIELD_NAME, TERM_QUERY_TEXT).toQuery(mockQueryShardContext),
QueryBuilders.termQuery(TEXT_FIELD_NAME, TERM_ANOTHER_QUERY_TEXT).toQuery(mockQueryShardContext)
),
filter
List.of(filter)
);
QueryUtils.check(hybridQuery);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.opensearch.index.mapper.SeqNoFieldMapper.PRIMARY_TERM_NAME;
import static org.opensearch.neuralsearch.search.util.HybridSearchResultFormatUtil.isHybridQueryStartStopElement;

import java.io.IOException;
Expand All @@ -25,7 +26,10 @@
import java.util.UUID;
import java.util.stream.Collectors;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
Expand Down Expand Up @@ -458,6 +462,7 @@ public void testWrappedHybridQuery_whenHybridWrappedIntoBool_thenFail() {
IndexMetadata indexMetadata = mock(IndexMetadata.class);
when(indexMetadata.getIndex()).thenReturn(new Index(TEST_INDEX, INDEX_UUID.toString()));
when(indexMetadata.getSettings()).thenReturn(Settings.EMPTY);
when(indexMetadata.getCustomData(eq(IndexMetadata.REMOTE_STORE_CUSTOM_KEY))).thenReturn(null);
Settings settings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, Integer.toString(1)).build();
IndexSettings indexSettings = new IndexSettings(indexMetadata, settings);
when(mockQueryShardContext.getIndexSettings()).thenReturn(indexSettings);
Expand Down Expand Up @@ -565,6 +570,7 @@ public void testWrappedHybridQuery_whenHybridWrappedIntoBoolAndIncorrectStructur
IndexMetadata indexMetadata = mock(IndexMetadata.class);
when(indexMetadata.getIndex()).thenReturn(new Index(TEST_INDEX, INDEX_UUID.toString()));
when(indexMetadata.getSettings()).thenReturn(Settings.EMPTY);
when(indexMetadata.getCustomData(eq(IndexMetadata.REMOTE_STORE_CUSTOM_KEY))).thenReturn(null);
Settings settings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, Integer.toString(1)).build();
IndexSettings indexSettings = new IndexSettings(indexMetadata, settings);
when(mockQueryShardContext.getIndexSettings()).thenReturn(indexSettings);
Expand Down Expand Up @@ -599,7 +605,7 @@ public void testWrappedHybridQuery_whenHybridWrappedIntoBoolAndIncorrectStructur

org.hamcrest.MatcherAssert.assertThat(
exception.getMessage(),
containsString("cannot process hybrid query due to incorrect structure of top level bool query")
containsString("cannot process hybrid query due to incorrect structure of top level query")
);

releaseResources(directory, w, reader);
Expand Down Expand Up @@ -629,6 +635,13 @@ public void testWrappedHybridQuery_whenHybridWrappedIntoBoolBecauseOfNested_then
when(mockQueryShardContext.fieldMapper(eq(TEXT_FIELD_NAME))).thenReturn(fieldType);
when(mockQueryShardContext.getMapperService()).thenReturn(mapperService);
when(mockQueryShardContext.simpleMatchToIndexNames(anyString())).thenReturn(Set.of(TEXT_FIELD_NAME));
IndexMetadata indexMetadata = mock(IndexMetadata.class);
when(indexMetadata.getIndex()).thenReturn(new Index(TEST_INDEX, INDEX_UUID.toString()));
when(indexMetadata.getSettings()).thenReturn(Settings.EMPTY);
when(indexMetadata.getCustomData(eq(IndexMetadata.REMOTE_STORE_CUSTOM_KEY))).thenReturn(null);
Settings settings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, Integer.toString(1)).build();
IndexSettings indexSettings = new IndexSettings(indexMetadata, settings);
when(mockQueryShardContext.getIndexSettings()).thenReturn(indexSettings);

Directory directory = newDirectory();
IndexWriter w = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));
Expand All @@ -640,10 +653,10 @@ public void testWrappedHybridQuery_whenHybridWrappedIntoBoolBecauseOfNested_then
int docId2 = RandomizedTest.randomInt();
int docId3 = RandomizedTest.randomInt();
int docId4 = RandomizedTest.randomInt();
w.addDocument(getDocument(TEXT_FIELD_NAME, docId1, TEST_DOC_TEXT1, ft));
w.addDocument(getDocument(TEXT_FIELD_NAME, docId2, TEST_DOC_TEXT2, ft));
w.addDocument(getDocument(TEXT_FIELD_NAME, docId3, TEST_DOC_TEXT3, ft));
w.addDocument(getDocument(TEXT_FIELD_NAME, docId4, TEST_DOC_TEXT4, ft));
w.addDocument(document(TEXT_FIELD_NAME, docId1, TEST_DOC_TEXT1, ft));
w.addDocument(document(TEXT_FIELD_NAME, docId2, TEST_DOC_TEXT2, ft));
w.addDocument(document(TEXT_FIELD_NAME, docId3, TEST_DOC_TEXT3, ft));
w.addDocument(document(TEXT_FIELD_NAME, docId4, TEST_DOC_TEXT4, ft));
w.commit();

IndexReader reader = DirectoryReader.open(w);
Expand Down Expand Up @@ -678,6 +691,7 @@ public void testWrappedHybridQuery_whenHybridWrappedIntoBoolBecauseOfNested_then
when(searchContext.indexShard()).thenReturn(indexShard);
when(searchContext.bucketCollectorProcessor()).thenReturn(SearchContext.NO_OP_BUCKET_COLLECTOR_PROCESSOR);
when(searchContext.mapperService()).thenReturn(mapperService);
when(searchContext.getQueryShardContext()).thenReturn(mockQueryShardContext);

LinkedList<QueryCollectorContext> collectors = new LinkedList<>();
boolean hasFilterCollector = randomBoolean();
Expand All @@ -689,7 +703,7 @@ public void testWrappedHybridQuery_whenHybridWrappedIntoBoolBecauseOfNested_then
queryBuilder.add(QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY_TEXT2));

BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(queryBuilder.toQuery(mockQueryShardContext), BooleanClause.Occur.SHOULD)
builder.add(queryBuilder.toQuery(mockQueryShardContext), BooleanClause.Occur.MUST)
.add(Queries.newNonNestedFilter(), BooleanClause.Occur.FILTER);
Query query = builder.build();

Expand Down Expand Up @@ -767,6 +781,7 @@ public void testBoolQuery_whenTooManyNestedLevels_thenSuccess() {
IndexMetadata indexMetadata = mock(IndexMetadata.class);
when(indexMetadata.getIndex()).thenReturn(new Index(TEST_INDEX, INDEX_UUID.toString()));
when(indexMetadata.getSettings()).thenReturn(Settings.EMPTY);
when(indexMetadata.getCustomData(eq(IndexMetadata.REMOTE_STORE_CUSTOM_KEY))).thenReturn(null);
Settings settings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, Integer.toString(1)).build();
IndexSettings indexSettings = new IndexSettings(indexMetadata, settings);
when(mockQueryShardContext.getIndexSettings()).thenReturn(indexSettings);
Expand Down Expand Up @@ -1003,4 +1018,12 @@ private BooleanQuery createNestedBoolQuery(final Query query1, final Query query
builder.add(createNestedBoolQuery(query1, query2, level - 1), BooleanClause.Occur.MUST);
return builder.build();
}

private static Document document(final String fieldName, int docId, final String fieldValue, final FieldType ft) {
Document doc = new Document();
doc.add(new TextField("id", Integer.toString(docId), Field.Store.YES));
doc.add(new Field(fieldName, fieldValue, ft));
doc.add(new NumericDocValuesField(PRIMARY_TERM_NAME, 0));
return doc;
}
}

0 comments on commit 5a6669e

Please sign in to comment.