Skip to content

Commit

Permalink
Add support for scored named queries (#11626)
Browse files Browse the repository at this point in the history
Opensearch already support labelling the queries, that returns as a list in the returned results, of which query it
matched. However one of the use case while doing hybrid search with query text and dense vector is to determine
individual scores for each query type. This is very useful in further analysis and building offline model to generate
better weights for ranking score. Hence adding this feature that sends the client to add the score for each matched
query.

---------

Signed-off-by: Dharin Shah <[email protected]>
Signed-off-by: Dharin Shah <[email protected]>
Co-authored-by: Dharin Shah <[email protected]>
(cherry picked from commit 52b27f4)
Signed-off-by: Andrew Ross <[email protected]>
  • Loading branch information
2 people authored and andrross committed Feb 22, 2024
1 parent 8e24648 commit ccbd26b
Show file tree
Hide file tree
Showing 23 changed files with 672 additions and 135 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Introduce query level setting `index.query.max_nested_depth` limiting nested queries ([#3268](https://github.com/opensearch-project/OpenSearch/issues/3268)
- Add toString methods to MultiSearchRequest, MultiGetRequest and CreateIndexRequest ([#12163](https://github.com/opensearch-project/OpenSearch/pull/12163))
- Fix error in RemoteSegmentStoreDirectory when debug logging is enabled ([#12328](https://github.com/opensearch-project/OpenSearch/pull/12328))
- Support for returning scores in matched queries ([#11626](https://github.com/opensearch-project/OpenSearch/pull/11626))

### Dependencies
- Bump `com.squareup.okio:okio` from 3.7.0 to 3.8.0 ([#12290](https://github.com/opensearch-project/OpenSearch/pull/12290))
Expand Down
1 change: 0 additions & 1 deletion release-notes/opensearch.release-notes-2.12.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
- Remove concurrent segment search feature flag for GA launch ([#12074](https://github.com/opensearch-project/OpenSearch/pull/12074))
- Enable Fuzzy codec for doc id fields using a bloom filter ([#11022](https://github.com/opensearch-project/OpenSearch/pull/11022))
- [Metrics Framework] Adds support for Histogram metric ([#12062](https://github.com/opensearch-project/OpenSearch/pull/12062))
- Support for returning scores in matched queries ([#11626](https://github.com/opensearch-project/OpenSearch/pull/11626))

### Dependencies
- Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,11 @@
"search_pipeline": {
"type": "string",
"description": "The search pipeline to use to execute this request"
},
"include_named_queries_score":{
"type": "boolean",
"description":"Indicates whether hit.matched_queries should be rendered as a map that includes the name of the matched query associated with its score (true) or as an array containing the name of the matched queries (false)",
"default":false
}
},
"body":{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
setup:
- skip:
version: " - 2.12.0"
reason: "implemented for versions post 2.12.0"

---
"matched queries":
- do:
indices.create:
index: test

- do:
bulk:
refresh: true
body:
- '{ "index" : { "_index" : "test_1", "_id" : "1" } }'
- '{"field" : 1 }'
- '{ "index" : { "_index" : "test_1", "_id" : "2" } }'
- '{"field" : [1, 2] }'

- do:
search:
index: test_1
body:
query:
bool: {
should: [
{
match: {
field: {
query: 1,
_name: match_field_1
}
}
},
{
match: {
field: {
query: 2,
_name: match_field_2,
boost: 10
}
}
}
]
}

- match: {hits.total.value: 2}
- length: {hits.hits.0.matched_queries: 2}
- match: {hits.hits.0.matched_queries: [ "match_field_1", "match_field_2" ]}
- length: {hits.hits.1.matched_queries: 1}
- match: {hits.hits.1.matched_queries: [ "match_field_1" ]}

---

"matched queries with scores":
- do:
indices.create:
index: test

- do:
bulk:
refresh: true
body:
- '{ "index" : { "_index" : "test_1", "_id" : "1" } }'
- '{"field" : 1 }'
- '{ "index" : { "_index" : "test_1", "_id" : "2" } }'
- '{"field" : [1, 2] }'

- do:
search:
include_named_queries_score: true
index: test_1
body:
query:
bool: {
should: [
{
match: {
field: {
query: 1,
_name: match_field_1
}
}
},
{
match: {
field: {
query: 2,
_name: match_field_2,
boost: 10
}
}
}
]
}

- match: { hits.total.value: 2 }
- length: { hits.hits.0.matched_queries: 2 }
- match: { hits.hits.0.matched_queries.match_field_1: 1 }
- match: { hits.hits.0.matched_queries.match_field_2: 10 }
- length: { hits.hits.1.matched_queries: 1 }
- match: { hits.hits.1.matched_queries.match_field_1: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,16 @@ public void testPrimaryRelocationWhileIndexing() throws Exception {
ensureGreen("test");
AtomicInteger numAutoGenDocs = new AtomicInteger();
final AtomicBoolean finished = new AtomicBoolean(false);
Thread indexingThread = new Thread() {
@Override
public void run() {
while (finished.get() == false && numAutoGenDocs.get() < 10_000) {
IndexResponse indexResponse = client().prepareIndex("test").setId("id").setSource("field", "value").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
DeleteResponse deleteResponse = client().prepareDelete("test", "id").get();
assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult());
client().prepareIndex("test").setSource("auto", true).get();
numAutoGenDocs.incrementAndGet();
}
Thread indexingThread = new Thread(() -> {
while (finished.get() == false && numAutoGenDocs.get() < 10_000) {
IndexResponse indexResponse = client().prepareIndex("test").setId("id").setSource("field", "value").get();
assertEquals(DocWriteResponse.Result.CREATED, indexResponse.getResult());
DeleteResponse deleteResponse = client().prepareDelete("test", "id").get();
assertEquals(DocWriteResponse.Result.DELETED, deleteResponse.getResult());
client().prepareIndex("test").setSource("auto", true).get();
numAutoGenDocs.incrementAndGet();
}
};
});
indexingThread.start();

ClusterState initialState = client().admin().cluster().prepareState().get().getState();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasItemInArray;
import static org.hamcrest.Matchers.hasKey;

public class MatchedQueriesIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {

Expand Down Expand Up @@ -95,15 +97,18 @@ public void testSimpleMatchedQueryFromFilteredQuery() throws Exception {
.should(rangeQuery("number").gte(2).queryName("test2"))
)
)
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("3") || hit.getId().equals("2")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test2"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test2"));
assertThat(hit.getMatchedQueryScore("test2"), equalTo(1f));
} else if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test1"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test1"));
assertThat(hit.getMatchedQueryScore("test1"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -113,15 +118,18 @@ public void testSimpleMatchedQueryFromFilteredQuery() throws Exception {
.setQuery(
boolQuery().should(rangeQuery("number").lte(2).queryName("test1")).should(rangeQuery("number").gt(2).queryName("test2"))
)
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1") || hit.getId().equals("2")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test1"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test1"));
assertThat(hit.getMatchedQueryScore("test1"), equalTo(1f));
} else if (hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("test2"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("test2"));
assertThat(hit.getMatchedQueryScore("test2"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -147,12 +155,15 @@ public void testSimpleMatchedQueryFromTopLevelFilter() throws Exception {
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(2));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueries(), hasItemInArray("title"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("title"));
assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f));
} else if (hit.getId().equals("2") || hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -168,12 +179,15 @@ public void testSimpleMatchedQueryFromTopLevelFilter() throws Exception {
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(2));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueries(), hasItemInArray("title"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("title"));
assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f));
} else if (hit.getId().equals("2") || hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -197,9 +211,11 @@ public void testSimpleMatchedQueryFromTopLevelFilterAndFilteredQuery() throws Ex
assertHitCount(searchResponse, 3L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1") || hit.getId().equals("2") || hit.getId().equals("3")) {
assertThat(hit.getMatchedQueries().length, equalTo(2));
assertThat(hit.getMatchedQueries(), hasItemInArray("name"));
assertThat(hit.getMatchedQueries(), hasItemInArray("title"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("name"));
assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("title"));
assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand Down Expand Up @@ -231,13 +247,15 @@ public void testRegExpQuerySupportsName() throws InterruptedException {

SearchResponse searchResponse = client().prepareSearch()
.setQuery(QueryBuilders.regexpQuery("title", "title1").queryName("regex"))
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 1L);

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("regex"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("regex"));
assertThat(hit.getMatchedQueryScore("regex"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -252,15 +270,17 @@ public void testPrefixQuerySupportsName() throws InterruptedException {
refresh();
indexRandomForConcurrentSearch("test1");

SearchResponse searchResponse = client().prepareSearch()
var query = client().prepareSearch()
.setQuery(QueryBuilders.prefixQuery("title", "title").queryName("prefix"))
.get();
.setIncludeNamedQueriesScore(true);
var searchResponse = query.get();
assertHitCount(searchResponse, 1L);

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("prefix"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("prefix"));
assertThat(hit.getMatchedQueryScore("prefix"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -282,8 +302,9 @@ public void testFuzzyQuerySupportsName() throws InterruptedException {

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("fuzzy"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("fuzzy"));
assertThat(hit.getMatchedQueryScore("fuzzy"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -300,13 +321,15 @@ public void testWildcardQuerySupportsName() throws InterruptedException {

SearchResponse searchResponse = client().prepareSearch()
.setQuery(QueryBuilders.wildcardQuery("title", "titl*").queryName("wildcard"))
.setIncludeNamedQueriesScore(true)
.get();
assertHitCount(searchResponse, 1L);

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("wildcard"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("wildcard"));
assertThat(hit.getMatchedQueryScore("wildcard"), equalTo(1f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -328,8 +351,9 @@ public void testSpanFirstQuerySupportsName() throws InterruptedException {

for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("span"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("span"));
assertThat(hit.getMatchedQueryScore("span"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand Down Expand Up @@ -363,11 +387,13 @@ public void testMatchedWithShould() throws Exception {
assertHitCount(searchResponse, 2L);
for (SearchHit hit : searchResponse.getHits()) {
if (hit.getId().equals("1")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("dolor"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("dolor"));
assertThat(hit.getMatchedQueryScore("dolor"), greaterThan(0f));
} else if (hit.getId().equals("2")) {
assertThat(hit.getMatchedQueries().length, equalTo(1));
assertThat(hit.getMatchedQueries(), hasItemInArray("elit"));
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("elit"));
assertThat(hit.getMatchedQueryScore("elit"), greaterThan(0f));
} else {
fail("Unexpected document returned with id " + hit.getId());
}
Expand All @@ -391,7 +417,10 @@ public void testMatchedWithWrapperQuery() throws Exception {
for (QueryBuilder query : queries) {
SearchResponse searchResponse = client().prepareSearch().setQuery(query).get();
assertHitCount(searchResponse, 1L);
assertThat(searchResponse.getHits().getAt(0).getMatchedQueries()[0], equalTo("abc"));
SearchHit hit = searchResponse.getHits().getAt(0);
assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1));
assertThat(hit.getMatchedQueriesAndScores(), hasKey("abc"));
assertThat(hit.getMatchedQueryScore("abc"), greaterThan(0f));
}
}
}
Loading

0 comments on commit ccbd26b

Please sign in to comment.