Skip to content

Commit

Permalink
ported SAI-4604 : Add proper metrics around distributed request delay…
Browse files Browse the repository at this point in the history
… / http2 client bandwidth (#158)
  • Loading branch information
noblepaul authored Oct 11, 2023
1 parent b0bfe4a commit 7143538
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import static org.apache.solr.util.stats.InstrumentedHttpListenerFactory.KNOWN_METRIC_NAME_STRATEGIES;

import com.codahale.metrics.Histogram;
import java.lang.invoke.MethodHandles;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -95,6 +96,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory

private String scheme = null;

private Histogram delayedRequests;

private InstrumentedHttpListenerFactory.NameStrategy metricNameStrategy;

protected final Random r = new Random();
Expand Down Expand Up @@ -194,6 +197,9 @@ private void initReplicaListTransformers(NamedList<?> routingConfig) {
new RequestReplicaListTransformerGenerator(defaultRltFactory, stableRltFactory);
}

private static final long DELAY_WARN_THRESHOLD =
TimeUnit.NANOSECONDS.convert(200, TimeUnit.MILLISECONDS);

@Override
public void init(PluginInfo info) {
StringBuilder sb = new StringBuilder();
Expand Down Expand Up @@ -287,8 +293,19 @@ public void init(PluginInfo info) {
.withMaxConnectionsPerHost(maxConnectionsPerHost)
.build();
this.defaultClient.addListenerFactory(this.httpListenerFactory);
this.loadbalancer = new LBHttp2SolrClient.Builder(defaultClient).build();

this.loadbalancer =
new LBHttp2SolrClient.Builder(defaultClient)
.setDelayedRequestListener(
it -> {
if (it > DELAY_WARN_THRESHOLD) {
long millis = TimeUnit.MILLISECONDS.convert(it, TimeUnit.NANOSECONDS);
log.info("Remote shard request delayed by {} milliseconds", millis);
if (delayedRequests != null) {
delayedRequests.update(millis);
}
}
})
.build();
initReplicaListTransformers(getParameter(args, "replicaRouting", null, sb));

log.debug("created with {}", sb);
Expand Down Expand Up @@ -410,6 +427,7 @@ private String buildUrl(String url) {
public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
solrMetricsContext = parentContext.getChildContext(this);
String expandedScope = SolrMetricManager.mkName(scope, SolrInfoBean.Category.QUERY.name());
delayedRequests = solrMetricsContext.histogram("delayedInterNodeRequests", expandedScope);
httpListenerFactory.initializeMetrics(solrMetricsContext, expandedScope);
commExecutor =
MetricUtils.instrumentedExecutorService(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.LongConsumer;
import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
Expand Down Expand Up @@ -90,18 +91,24 @@ public class LBHttp2SolrClient extends LBSolrClient {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final Http2SolrClient solrClient;

private final LongConsumer delayedRequestListener;

/**
* @deprecated Use {@link LBHttp2SolrClient.Builder} instead
*/
@Deprecated
public LBHttp2SolrClient(Http2SolrClient solrClient, String... baseSolrUrls) {
super(Arrays.asList(baseSolrUrls));
this.solrClient = solrClient;
this.delayedRequestListener = DELAYED_REQ_LOGGER;
}

private LBHttp2SolrClient(Http2SolrClient solrClient, List<String> baseSolrUrls) {
private LBHttp2SolrClient(
Http2SolrClient solrClient, List<String> baseSolrUrls, LongConsumer delayedRequestListener) {
super(baseSolrUrls);
this.solrClient = solrClient;
this.delayedRequestListener =
delayedRequestListener == null ? DELAYED_REQ_LOGGER : delayedRequestListener;
}

@Override
Expand Down Expand Up @@ -250,6 +257,14 @@ private interface RetryListener {

private static final long DELAY_WARN_THRESHOLD =
TimeUnit.NANOSECONDS.convert(2000, TimeUnit.MILLISECONDS);
static LongConsumer DELAYED_REQ_LOGGER =
it -> {
if (it > DELAY_WARN_THRESHOLD) {
log.info(
"Remote shard request delayed by {} milliseconds",
TimeUnit.MILLISECONDS.convert(it, TimeUnit.NANOSECONDS));
}
};

private Cancellable doRequest(
String baseUrl,
Expand All @@ -273,13 +288,7 @@ public void onStart() {
// the request. Here we add extra logging to notify us if this assumption is
// violated. See: SOLR-16099, SOLR-16129,
// https://github.com/fullstorydev/lucene-solr/commit/445508adb4a
long delayed = System.nanoTime() - requestSubmitTimeNanos;
if (delayed > DELAY_WARN_THRESHOLD) {
log.info(
"Remote shard request to {} delayed by {} milliseconds",
req.servers,
TimeUnit.MILLISECONDS.convert(delayed, TimeUnit.NANOSECONDS));
}
delayedRequestListener.accept(System.nanoTime() - requestSubmitTimeNanos);
AsyncListener.super.onStart();
}

Expand Down Expand Up @@ -342,6 +351,8 @@ public static class Builder {

private final Http2SolrClient http2SolrClient;
private final String[] baseSolrUrls;

private LongConsumer delayedRequestListener;
private long aliveCheckIntervalMillis =
TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); // 1 minute between checks

Expand All @@ -350,6 +361,11 @@ public Builder(Http2SolrClient http2Client, String... baseSolrUrls) {
this.baseSolrUrls = baseSolrUrls;
}

public Builder setDelayedRequestListener(LongConsumer listener) {
this.delayedRequestListener = listener;
return this;
}

/**
* LBHttpSolrServer keeps pinging the dead servers at fixed interval to find if it is alive. Use
* this to set that interval
Expand All @@ -367,7 +383,8 @@ public LBHttp2SolrClient.Builder setAliveCheckInterval(int aliveCheckInterval, T

public LBHttp2SolrClient build() {
LBHttp2SolrClient solrClient =
new LBHttp2SolrClient(this.http2SolrClient, Arrays.asList(this.baseSolrUrls));
new LBHttp2SolrClient(
this.http2SolrClient, Arrays.asList(this.baseSolrUrls), delayedRequestListener);
solrClient.aliveCheckIntervalMillis = this.aliveCheckIntervalMillis;
return solrClient;
}
Expand Down

0 comments on commit 7143538

Please sign in to comment.