Skip to content

Commit

Permalink
Add support for index alias with filter
Browse files Browse the repository at this point in the history
Signed-off-by: Martin Gaievski <[email protected]>
  • Loading branch information
martin-gaievski committed Apr 4, 2024
1 parent cc6a6b2 commit c23b638
Show file tree
Hide file tree
Showing 8 changed files with 306 additions and 93 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Enhancements
### Bug Fixes
- Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
- Fixed exception when running hybrid query on index alias with filter ([#670](https://github.com/opensearch-project/neural-search/pull/670))
### Infrastructure
### Documentation
### Maintenance
Expand Down
24 changes: 22 additions & 2 deletions src/main/java/org/opensearch/neuralsearch/query/HybridQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,32 @@ public final class HybridQuery extends Query implements Iterable<Query> {

private final List<Query> subQueries;

public HybridQuery(Collection<Query> subQueries) {
/**
* Create new instance of hybrid query object based on collection of sub queries and filter query
* @param subQueries collection of queries that are executed individually and contribute to a final list of combined scores
* @param filterQuery filter that will be applied to each sub query. If this is null sub queries will be executed as is
*/
public HybridQuery(final Collection<Query> subQueries, final Query filterQuery) {
Objects.requireNonNull(subQueries, "collection of queries must not be null");
if (subQueries.isEmpty()) {
throw new IllegalArgumentException("collection of queries must not be empty");
}
this.subQueries = new ArrayList<>(subQueries);
if (Objects.isNull(filterQuery)) {
this.subQueries = new ArrayList<>(subQueries);
} else {
List<Query> modifiedSubQueries = new ArrayList<>();
for (Query subQuery : subQueries) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(subQuery, BooleanClause.Occur.MUST);
builder.add(filterQuery, BooleanClause.Occur.FILTER);
modifiedSubQueries.add(builder.build());
}
this.subQueries = modifiedSubQueries;
}
}

public HybridQuery(final Collection<Query> subQueries) {
this(subQueries, null);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.apache.lucene.search.Query;
import org.opensearch.common.settings.Settings;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.search.NestedHelper;
import org.opensearch.neuralsearch.query.HybridQuery;
import org.opensearch.search.aggregations.AggregationProcessor;
import org.opensearch.search.internal.ContextIndexSearcher;
Expand All @@ -25,6 +24,8 @@

import lombok.extern.log4j.Log4j2;

import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasAliasFilter;
import static org.opensearch.neuralsearch.util.HybridQueryUtil.hasNestedFieldOrNestedDocs;
import static org.opensearch.neuralsearch.util.HybridQueryUtil.isHybridQuery;

/**
Expand All @@ -51,10 +52,6 @@ public boolean searchWith(
}
}

private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
}

private static boolean isWrappedHybridQuery(final Query query) {
return query instanceof BooleanQuery
&& ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
Expand All @@ -64,13 +61,21 @@ private static boolean isWrappedHybridQuery(final Query query) {
protected Query extractHybridQuery(final SearchContext searchContext, final Query query) {
if (hasNestedFieldOrNestedDocs(query, searchContext)
&& isWrappedHybridQuery(query)
&& ((BooleanQuery) query).clauses().size() > 0) {
&& !((BooleanQuery) query).clauses().isEmpty()) {
// extract hybrid query and replace bool with hybrid query
List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
if (booleanClauses.isEmpty() || booleanClauses.get(0).getQuery() instanceof HybridQuery == false) {
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level bool query");
}
return booleanClauses.get(0).getQuery();
} else if (hasAliasFilter(query, searchContext) && isWrappedHybridQuery(query) && !((BooleanQuery) query).clauses().isEmpty()) {
List<BooleanClause> booleanClauses = ((BooleanQuery) query).clauses();
if (!(booleanClauses.get(0).getQuery() instanceof HybridQuery)) {
throw new IllegalStateException("cannot process hybrid query due to incorrect structure of top level query");

Check warning on line 74 in src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java#L74

Added line #L74 was not covered by tests
}
HybridQuery hybridQuery = (HybridQuery) booleanClauses.get(0).getQuery();
HybridQuery hybridQueryWithFilter = new HybridQuery(hybridQuery.getSubQueries(), searchContext.aliasFilter());
return hybridQueryWithFilter;
}
return query;
}
Expand Down
70 changes: 41 additions & 29 deletions src/main/java/org/opensearch/neuralsearch/util/HybridQueryUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.opensearch.neuralsearch.query.HybridQuery;
import org.opensearch.search.internal.SearchContext;

import java.util.Objects;

/**
* Utility class for anything related to hybrid query
*/
Expand All @@ -24,48 +26,58 @@ public class HybridQueryUtil {
public static boolean isHybridQuery(final Query query, final SearchContext searchContext) {
if (query instanceof HybridQuery) {
return true;
} else if (isWrappedHybridQuery(query) && hasNestedFieldOrNestedDocs(query, searchContext)) {
/* Checking if this is a hybrid query that is wrapped into a Bool query by core Opensearch code
https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/DefaultSearchContext.java#L367-L370.
main reason for that is performance optimization, at time of writing we are ok with loosing on performance if that's unblocks
hybrid query for indexes with nested field types.
in such case we consider query a valid hybrid query. Later in the code we will extract it and execute as a main query for
this search request.
below is sample structure of such query:
} else if (isWrappedHybridQuery(query)) {
if ((hasNestedFieldOrNestedDocs(query, searchContext))) {
/* Checking if this is a hybrid query that is wrapped into a Bool query by core Opensearch code
https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/DefaultSearchContext.java#L367-L370.
main reason for that is performance optimization, at time of writing we are ok with loosing on performance if that's unblocks
hybrid query for indexes with nested field types.
in such case we consider query a valid hybrid query. Later in the code we will extract it and execute as a main query for
this search request.
below is sample structure of such query:
Boolean {
should: {
hybrid: {
sub_query1 {}
sub_query2 {}
Boolean {
should: {
hybrid: {
sub_query1 {}
sub_query2 {}
}
}
}
filter: {
exists: {
field: "_primary_term"
filter: {
exists: {
field: "_primary_term"
}
}
}
}
TODO Need to add logic for passing hybrid sub-queries through the same logic in core to ensure there is no latency regression */
// we have already checked if query in instance of Boolean in higher level else if condition
return ((BooleanQuery) query).clauses()
.stream()
.filter(clause -> clause.getQuery() instanceof HybridQuery == false)
.allMatch(
clause -> clause.getOccur() == BooleanClause.Occur.FILTER
&& clause.getQuery() instanceof FieldExistsQuery
&& SeqNoFieldMapper.PRIMARY_TERM_NAME.equals(((FieldExistsQuery) clause.getQuery()).getField())
);
} else if (hasAliasFilter(query, searchContext)) {
// Checking case with alias that has a filter
return true;
}
TODO Need to add logic for passing hybrid sub-queries through the same logic in core to ensure there is no latency regression */
// we have already checked if query in instance of Boolean in higher level else if condition
return ((BooleanQuery) query).clauses()
.stream()
.filter(clause -> clause.getQuery() instanceof HybridQuery == false)
.allMatch(clause -> {
return clause.getOccur() == BooleanClause.Occur.FILTER
&& clause.getQuery() instanceof FieldExistsQuery
&& SeqNoFieldMapper.PRIMARY_TERM_NAME.equals(((FieldExistsQuery) clause.getQuery()).getField());
});
return false;
}
return false;
}

private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
public static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
}

private static boolean isWrappedHybridQuery(final Query query) {
return query instanceof BooleanQuery
&& ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.getQuery() instanceof HybridQuery);
}

public static boolean hasAliasFilter(final Query query, final SearchContext searchContext) {
return Objects.nonNull(searchContext.aliasFilter());
}
}
73 changes: 55 additions & 18 deletions src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.opensearch.index.query.BoolQueryBuilder;
import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.index.query.NestedQueryBuilder;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.index.query.RangeQueryBuilder;
import org.opensearch.index.query.TermQueryBuilder;
Expand Down Expand Up @@ -129,10 +130,8 @@ protected boolean preserveClusterUponCompletion() {
*/
@SneakyThrows
public void testComplexQuery_whenMultipleSubqueries_thenSuccessful() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
TermQueryBuilder termQueryBuilder1 = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3);
TermQueryBuilder termQueryBuilder2 = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT4);
Expand Down Expand Up @@ -173,7 +172,7 @@ public void testComplexQuery_whenMultipleSubqueries_thenSuccessful() {
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
wipeOfTestResources(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME, null, null, SEARCH_PIPELINE);
}
}

Expand Down Expand Up @@ -205,10 +204,8 @@ public void testComplexQuery_whenMultipleSubqueries_thenSuccessful() {
*/
@SneakyThrows
public void testComplexQuery_whenMultipleIdenticalSubQueries_thenSuccessful() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
TermQueryBuilder termQueryBuilder1 = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3);
TermQueryBuilder termQueryBuilder2 = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT4);
Expand Down Expand Up @@ -248,16 +245,14 @@ public void testComplexQuery_whenMultipleIdenticalSubQueries_thenSuccessful() {
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
wipeOfTestResources(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_BASIC_VECTOR_DOC_FIELD_INDEX_NAME, null, null, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT);
TermQueryBuilder termQuery2Builder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT2);
Expand All @@ -283,16 +278,14 @@ public void testNoMatchResults_whenOnlyTermSubQueryWithoutMatch_thenEmptyResult(
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, null, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testNestedQuery_whenHybridQueryIsWrappedIntoOtherQuery_thenFail() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3);
MatchQueryBuilder matchQuery2Builder = QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT4);
Expand Down Expand Up @@ -336,16 +329,14 @@ public void testNestedQuery_whenHybridQueryIsWrappedIntoOtherQuery_thenFail() {
)
);
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD, null, null, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testIndexWithNestedFields_whenHybridQuery_thenSuccess() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3);
TermQueryBuilder termQuery2Builder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT2);
Expand All @@ -371,16 +362,14 @@ public void testIndexWithNestedFields_whenHybridQuery_thenSuccess() {
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD, null, null, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testIndexWithNestedFields_whenHybridQueryIncludesNested_thenSuccess() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT);
NestedQueryBuilder nestedQueryBuilder = QueryBuilders.nestedQuery(
Expand Down Expand Up @@ -410,7 +399,7 @@ public void testIndexWithNestedFields_whenHybridQueryIncludesNested_thenSuccess(
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_WITH_NESTED_TYPE_NAME_ONE_SHARD, null, null, SEARCH_PIPELINE);
}
}

Expand Down Expand Up @@ -578,6 +567,54 @@ public void testRequestCache_whenMultipleShardsQueryReturnResults_thenSuccessful
}
}

@SneakyThrows
public void testIndexAlias_whenHybridQueryAndIndexAliasHasFilter_thenSuccess() {
String modelId = null;
String alias = "alias_with_filter";
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
// create alias for index
QueryBuilder aliasFilter = QueryBuilders.boolQuery()
.mustNot(QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
createIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias, aliasFilter);

NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
TEST_KNN_VECTOR_FIELD_NAME_1,
TEST_QUERY_TEXT,
"",
modelId,
5,
null,
null
);
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(neuralQueryBuilder);

Map<String, Object> searchResponseAsMap = search(
alias,
hybridQueryBuilder,
null,
10,
Map.of("search_pipeline", SEARCH_PIPELINE)
);

assertEquals(2, getHitCount(searchResponseAsMap));
assertTrue(getMaxScore(searchResponseAsMap).isPresent());
assertEquals(1.0f, getMaxScore(searchResponseAsMap).get(), DELTA_FOR_SCORE_ASSERTION);

Map<String, Object> total = getTotalHits(searchResponseAsMap);
assertNotNull(total.get("value"));
assertEquals(2, total.get("value"));
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
deleteIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
}
}

@SneakyThrows
private void initializeIndexIfNotExist(String indexName) throws IOException {
if (TEST_BASIC_INDEX_NAME.equals(indexName) && !indexExists(TEST_BASIC_INDEX_NAME)) {
Expand Down
Loading

0 comments on commit c23b638

Please sign in to comment.