Skip to content

Commit

Permalink
Add telemetry for retrievers (elastic#114109)
Browse files Browse the repository at this point in the history
  • Loading branch information
pmpailis authored Oct 10, 2024
1 parent fd40f8b commit 4eab631
Show file tree
Hide file tree
Showing 19 changed files with 677 additions and 47 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/114109.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 114109
summary: Update cluster stats for retrievers
area: Search
type: enhancement
issues: []
4 changes: 4 additions & 0 deletions docs/reference/cluster/stats.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,10 @@ Queries are counted once per search request, meaning that if the same query type
(object) Search sections used in selected nodes.
For each section, name and number of times it's been used is reported.

`retrievers`::
(object) Retriever types that were used in selected nodes.
For each retriever, name and number of times it's been used is reported.

=====
`dense_vector`::
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.search.retriever;

import org.elasticsearch.action.admin.cluster.node.capabilities.NodesCapabilitiesRequest;
import org.elasticsearch.action.admin.cluster.node.capabilities.NodesCapabilitiesResponse;
import org.elasticsearch.action.admin.cluster.stats.SearchUsageStats;
import org.elasticsearch.client.Request;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.vectors.KnnSearchBuilder;
import org.elasticsearch.search.vectors.KnnVectorQueryBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.junit.Before;

import java.io.IOException;
import java.util.List;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.equalTo;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
public class RetrieverTelemetryIT extends ESIntegTestCase {

private static final String INDEX_NAME = "test_index";

@Override
protected boolean addMockHttpTransport() {
return false; // enable http
}

@Before
public void setup() throws IOException {
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("vector")
.field("type", "dense_vector")
.field("dims", 1)
.field("index", true)
.field("similarity", "l2_norm")
.startObject("index_options")
.field("type", "hnsw")
.endObject()
.endObject()
.startObject("text")
.field("type", "text")
.endObject()
.startObject("integer")
.field("type", "integer")
.endObject()
.startObject("topic")
.field("type", "keyword")
.endObject()
.endObject()
.endObject();

assertAcked(prepareCreate(INDEX_NAME).setMapping(builder));
ensureGreen(INDEX_NAME);
}

private void performSearch(SearchSourceBuilder source) throws IOException {
Request request = new Request("GET", INDEX_NAME + "/_search");
request.setJsonEntity(Strings.toString(source));
getRestClient().performRequest(request);
}

public void testTelemetryForRetrievers() throws IOException {

if (false == isRetrieverTelemetryEnabled()) {
return;
}

// search#1 - this will record 1 entry for "retriever" in `sections`, and 1 for "knn" under `retrievers`
{
performSearch(new SearchSourceBuilder().retriever(new KnnRetrieverBuilder("vector", new float[] { 1.0f }, null, 10, 15, null)));
}

// search#2 - this will record 1 entry for "retriever" in `sections`, 1 for "standard" under `retrievers`, and 1 for "range" under
// `queries`
{
performSearch(new SearchSourceBuilder().retriever(new StandardRetrieverBuilder(QueryBuilders.rangeQuery("integer").gte(2))));
}

// search#3 - this will record 1 entry for "retriever" in `sections`, and 1 for "standard" under `retrievers`, and 1 for "knn" under
// `queries`
{
performSearch(
new SearchSourceBuilder().retriever(
new StandardRetrieverBuilder(new KnnVectorQueryBuilder("vector", new float[] { 1.0f }, 10, 15, null))
)
);
}

// search#4 - this will record 1 entry for "retriever" in `sections`, and 1 for "standard" under `retrievers`, and 1 for "term"
// under `queries`
{
performSearch(new SearchSourceBuilder().retriever(new StandardRetrieverBuilder(QueryBuilders.termQuery("topic", "foo"))));
}

// search#5 - t
// his will record 1 entry for "knn" in `sections`
{
performSearch(new SearchSourceBuilder().knnSearch(List.of(new KnnSearchBuilder("vector", new float[] { 1.0f }, 10, 15, null))));
}

// search#6 - this will record 1 entry for "query" in `sections`, and 1 for "match_all" under `queries`
{
performSearch(new SearchSourceBuilder().query(QueryBuilders.matchAllQuery()));
}

// cluster stats
{
SearchUsageStats stats = clusterAdmin().prepareClusterStats().get().getIndicesStats().getSearchUsageStats();
assertEquals(6, stats.getTotalSearchCount());

assertThat(stats.getSectionsUsage().size(), equalTo(3));
assertThat(stats.getSectionsUsage().get("retriever"), equalTo(4L));
assertThat(stats.getSectionsUsage().get("query"), equalTo(1L));
assertThat(stats.getSectionsUsage().get("knn"), equalTo(1L));

assertThat(stats.getRetrieversUsage().size(), equalTo(2));
assertThat(stats.getRetrieversUsage().get("standard"), equalTo(3L));
assertThat(stats.getRetrieversUsage().get("knn"), equalTo(1L));

assertThat(stats.getQueryUsage().size(), equalTo(4));
assertThat(stats.getQueryUsage().get("range"), equalTo(1L));
assertThat(stats.getQueryUsage().get("term"), equalTo(1L));
assertThat(stats.getQueryUsage().get("match_all"), equalTo(1L));
assertThat(stats.getQueryUsage().get("knn"), equalTo(1L));
}
}

private boolean isRetrieverTelemetryEnabled() throws IOException {
NodesCapabilitiesResponse res = clusterAdmin().nodesCapabilities(
new NodesCapabilitiesRequest().method(RestRequest.Method.GET).path("_cluster/stats").capabilities("retrievers-usage-stats")
).actionGet();
return res != null && res.isSupported().orElse(false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ static TransportVersion def(int id) {
public static final TransportVersion FAST_REFRESH_RCO = def(8_762_00_0);
public static final TransportVersion TEXT_SIMILARITY_RERANKER_QUERY_REWRITE = def(8_763_00_0);
public static final TransportVersion SIMULATE_INDEX_TEMPLATES_SUBSTITUTIONS = def(8_764_00_0);
public static final TransportVersion RETRIEVERS_TELEMETRY_ADDED = def(8_765_00_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.Map;
import java.util.Objects;

import static org.elasticsearch.TransportVersions.RETRIEVERS_TELEMETRY_ADDED;
import static org.elasticsearch.TransportVersions.V_8_12_0;

/**
Expand All @@ -34,6 +35,7 @@ public final class SearchUsageStats implements Writeable, ToXContentFragment {
private final Map<String, Long> queries;
private final Map<String, Long> rescorers;
private final Map<String, Long> sections;
private final Map<String, Long> retrievers;

/**
* Creates a new empty stats instance, that will get additional stats added through {@link #add(SearchUsageStats)}
Expand All @@ -43,24 +45,33 @@ public SearchUsageStats() {
this.queries = new HashMap<>();
this.sections = new HashMap<>();
this.rescorers = new HashMap<>();
this.retrievers = new HashMap<>();
}

/**
* Creates a new stats instance with the provided info. The expectation is that when a new instance is created using
* this constructor, the provided stats are final and won't be modified further.
*/
public SearchUsageStats(Map<String, Long> queries, Map<String, Long> rescorers, Map<String, Long> sections, long totalSearchCount) {
public SearchUsageStats(
Map<String, Long> queries,
Map<String, Long> rescorers,
Map<String, Long> sections,
Map<String, Long> retrievers,
long totalSearchCount
) {
this.totalSearchCount = totalSearchCount;
this.queries = queries;
this.sections = sections;
this.rescorers = rescorers;
this.retrievers = retrievers;
}

public SearchUsageStats(StreamInput in) throws IOException {
this.queries = in.readMap(StreamInput::readLong);
this.sections = in.readMap(StreamInput::readLong);
this.totalSearchCount = in.readVLong();
this.rescorers = in.getTransportVersion().onOrAfter(V_8_12_0) ? in.readMap(StreamInput::readLong) : Map.of();
this.retrievers = in.getTransportVersion().onOrAfter(RETRIEVERS_TELEMETRY_ADDED) ? in.readMap(StreamInput::readLong) : Map.of();
}

@Override
Expand All @@ -72,6 +83,9 @@ public void writeTo(StreamOutput out) throws IOException {
if (out.getTransportVersion().onOrAfter(V_8_12_0)) {
out.writeMap(rescorers, StreamOutput::writeLong);
}
if (out.getTransportVersion().onOrAfter(RETRIEVERS_TELEMETRY_ADDED)) {
out.writeMap(retrievers, StreamOutput::writeLong);
}
}

/**
Expand All @@ -81,6 +95,7 @@ public void add(SearchUsageStats stats) {
stats.queries.forEach((query, count) -> queries.merge(query, count, Long::sum));
stats.rescorers.forEach((rescorer, count) -> rescorers.merge(rescorer, count, Long::sum));
stats.sections.forEach((query, count) -> sections.merge(query, count, Long::sum));
stats.retrievers.forEach((query, count) -> retrievers.merge(query, count, Long::sum));
this.totalSearchCount += stats.totalSearchCount;
}

Expand All @@ -95,6 +110,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.map(rescorers);
builder.field("sections");
builder.map(sections);
builder.field("retrievers");
builder.map(retrievers);
}
builder.endObject();
return builder;
Expand All @@ -112,6 +129,10 @@ public Map<String, Long> getSectionsUsage() {
return Collections.unmodifiableMap(sections);
}

public Map<String, Long> getRetrieversUsage() {
return Collections.unmodifiableMap(retrievers);
}

public long getTotalSearchCount() {
return totalSearchCount;
}
Expand All @@ -128,12 +149,13 @@ public boolean equals(Object o) {
return totalSearchCount == that.totalSearchCount
&& queries.equals(that.queries)
&& rescorers.equals(that.rescorers)
&& sections.equals(that.sections);
&& sections.equals(that.sections)
&& retrievers.equals(that.retrievers);
}

@Override
public int hashCode() {
return Objects.hash(totalSearchCount, queries, rescorers, sections);
return Objects.hash(totalSearchCount, queries, rescorers, sections, retrievers);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ public class RestClusterStatsAction extends BaseRestHandler {
private static final Set<String> SUPPORTED_CAPABILITIES = Set.of(
"human-readable-total-docs-size",
"verbose-dense-vector-mapping-stats",
"ccs-stats"
"ccs-stats",
"retrievers-usage-stats"
);
private static final Set<String> SUPPORTED_QUERY_PARAMETERS = Set.of("include_remotes", "nodeId", REST_TIMEOUT_PARAM);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,7 @@ private SearchSourceBuilder parseXContent(
parser,
new RetrieverParserContext(searchUsage, clusterSupportsFeature)
);
searchUsage.trackSectionUsage(RETRIEVER.getPreferredName());
} else if (QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
if (subSearchSourceBuilders.isEmpty() == false) {
throw new IllegalArgumentException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ protected static void declareBaseParserFields(
String name,
AbstractObjectParser<? extends RetrieverBuilder, RetrieverParserContext> parser
) {
parser.declareObjectArray((r, v) -> r.preFilterQueryBuilders = v, (p, c) -> {
QueryBuilder preFilterQueryBuilder = AbstractQueryBuilder.parseTopLevelQuery(p, c::trackQueryUsage);
c.trackSectionUsage(name + ":" + PRE_FILTER_FIELD.getPreferredName());
return preFilterQueryBuilder;
}, PRE_FILTER_FIELD);
parser.declareObjectArray(
(r, v) -> r.preFilterQueryBuilders = v,
(p, c) -> AbstractQueryBuilder.parseTopLevelQuery(p, c::trackQueryUsage),
PRE_FILTER_FIELD
);
parser.declareString(RetrieverBuilder::retrieverName, NAME_FIELD);
parser.declareFloat(RetrieverBuilder::minScore, MIN_SCORE_FIELD);
}
Expand Down Expand Up @@ -138,7 +138,7 @@ protected static RetrieverBuilder parseInnerRetrieverBuilder(XContentParser pars
throw new ParsingException(new XContentLocation(nonfe.getLineNumber(), nonfe.getColumnNumber()), message, nonfe);
}

context.trackSectionUsage(retrieverName);
context.trackRetrieverUsage(retrieverName);

if (parser.currentToken() != XContentParser.Token.END_OBJECT) {
throw new ParsingException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ public void trackRescorerUsage(String name) {
searchUsage.trackRescorerUsage(name);
}

public void trackRetrieverUsage(String name) {
searchUsage.trackRetrieverUsage(name);
}

public boolean clusterSupportsFeature(NodeFeature nodeFeature) {
return clusterSupportsFeature != null && clusterSupportsFeature.test(nodeFeature);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,36 +55,28 @@ public final class StandardRetrieverBuilder extends RetrieverBuilder implements
static {
PARSER.declareObject((r, v) -> r.queryBuilder = v, (p, c) -> {
QueryBuilder queryBuilder = AbstractQueryBuilder.parseTopLevelQuery(p, c::trackQueryUsage);
c.trackSectionUsage(NAME + ":" + QUERY_FIELD.getPreferredName());
return queryBuilder;
}, QUERY_FIELD);

PARSER.declareField((r, v) -> r.searchAfterBuilder = v, (p, c) -> {
SearchAfterBuilder searchAfterBuilder = SearchAfterBuilder.fromXContent(p);
c.trackSectionUsage(NAME + ":" + SEARCH_AFTER_FIELD.getPreferredName());
return searchAfterBuilder;
}, SEARCH_AFTER_FIELD, ObjectParser.ValueType.OBJECT_ARRAY);

PARSER.declareField((r, v) -> r.terminateAfter = v, (p, c) -> {
int terminateAfter = p.intValue();
c.trackSectionUsage(NAME + ":" + TERMINATE_AFTER_FIELD.getPreferredName());
return terminateAfter;
}, TERMINATE_AFTER_FIELD, ObjectParser.ValueType.INT);

PARSER.declareField((r, v) -> r.sortBuilders = v, (p, c) -> {
List<SortBuilder<?>> sortBuilders = SortBuilder.fromXContent(p);
c.trackSectionUsage(NAME + ":" + SORT_FIELD.getPreferredName());
return sortBuilders;
}, SORT_FIELD, ObjectParser.ValueType.OBJECT_ARRAY);

PARSER.declareField((r, v) -> r.collapseBuilder = v, (p, c) -> {
CollapseBuilder collapseBuilder = CollapseBuilder.fromXContent(p);
if (collapseBuilder.getField() != null) {
c.trackSectionUsage(COLLAPSE_FIELD.getPreferredName());
}
return collapseBuilder;
}, COLLAPSE_FIELD, ObjectParser.ValueType.OBJECT);

PARSER.declareField(
(r, v) -> r.searchAfterBuilder = v,
(p, c) -> SearchAfterBuilder.fromXContent(p),
SEARCH_AFTER_FIELD,
ObjectParser.ValueType.OBJECT_ARRAY
);
PARSER.declareField((r, v) -> r.terminateAfter = v, (p, c) -> p.intValue(), TERMINATE_AFTER_FIELD, ObjectParser.ValueType.INT);
PARSER.declareField(
(r, v) -> r.sortBuilders = v,
(p, c) -> SortBuilder.fromXContent(p),
SORT_FIELD,
ObjectParser.ValueType.OBJECT_ARRAY
);
PARSER.declareField(
(r, v) -> r.collapseBuilder = v,
(p, c) -> CollapseBuilder.fromXContent(p),
COLLAPSE_FIELD,
ObjectParser.ValueType.OBJECT
);
RetrieverBuilder.declareBaseParserFields(NAME, PARSER);
}

Expand Down
Loading

0 comments on commit 4eab631

Please sign in to comment.