diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java index 5b227359701..7e1032f6d2c 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java @@ -18,6 +18,7 @@ import static org.apache.solr.util.stats.InstrumentedHttpListenerFactory.KNOWN_METRIC_NAME_STRATEGIES; +import com.codahale.metrics.Histogram; import java.lang.invoke.MethodHandles; import java.util.Iterator; import java.util.List; @@ -95,6 +96,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory private String scheme = null; + private Histogram delayedRequests; + private InstrumentedHttpListenerFactory.NameStrategy metricNameStrategy; protected final Random r = new Random(); @@ -194,6 +197,9 @@ private void initReplicaListTransformers(NamedList<?> routingConfig) { new RequestReplicaListTransformerGenerator(defaultRltFactory, stableRltFactory); } + private static final long DELAY_WARN_THRESHOLD = + TimeUnit.NANOSECONDS.convert(200, TimeUnit.MILLISECONDS); + @Override public void init(PluginInfo info) { StringBuilder sb = new StringBuilder(); @@ -287,8 +293,19 @@ public void init(PluginInfo info) { .withMaxConnectionsPerHost(maxConnectionsPerHost) .build(); this.defaultClient.addListenerFactory(this.httpListenerFactory); - this.loadbalancer = new LBHttp2SolrClient.Builder(defaultClient).build(); - + this.loadbalancer = + new LBHttp2SolrClient.Builder(defaultClient) + .setDelayedRequestListener( + it -> { + if (it > DELAY_WARN_THRESHOLD) { + long millis = TimeUnit.MILLISECONDS.convert(it, TimeUnit.NANOSECONDS); + log.info("Remote shard request delayed by {} milliseconds", millis); + if (delayedRequests != null) { + delayedRequests.update(millis); + } + } + }) + .build(); initReplicaListTransformers(getParameter(args, "replicaRouting", null, sb)); log.debug("created with {}", sb); @@ -410,6 +427,7 @@ private String buildUrl(String url) { public void initializeMetrics(SolrMetricsContext parentContext, String scope) { solrMetricsContext = parentContext.getChildContext(this); String expandedScope = SolrMetricManager.mkName(scope, SolrInfoBean.Category.QUERY.name()); + delayedRequests = solrMetricsContext.histogram("delayedInterNodeRequests", expandedScope); httpListenerFactory.initializeMetrics(solrMetricsContext, expandedScope); commExecutor = MetricUtils.instrumentedExecutorService( diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java index fbf8b659a55..b02bf1cc96c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java @@ -30,6 +30,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.LongConsumer; import org.apache.solr.client.solrj.ResponseParser; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; @@ -90,6 +91,8 @@ public class LBHttp2SolrClient extends LBSolrClient { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final Http2SolrClient solrClient; + private final LongConsumer delayedRequestListener; + /** * @deprecated Use {@link LBHttp2SolrClient.Builder} instead */ @@ -97,11 +100,15 @@ public class LBHttp2SolrClient extends LBSolrClient { public LBHttp2SolrClient(Http2SolrClient solrClient, String... baseSolrUrls) { super(Arrays.asList(baseSolrUrls)); this.solrClient = solrClient; + this.delayedRequestListener = DELAYED_REQ_LOGGER; } - private LBHttp2SolrClient(Http2SolrClient solrClient, List<String> baseSolrUrls) { + private LBHttp2SolrClient( + Http2SolrClient solrClient, List<String> baseSolrUrls, LongConsumer delayedRequestListener) { super(baseSolrUrls); this.solrClient = solrClient; + this.delayedRequestListener = + delayedRequestListener == null ? DELAYED_REQ_LOGGER : delayedRequestListener; } @Override @@ -250,6 +257,14 @@ private interface RetryListener { private static final long DELAY_WARN_THRESHOLD = TimeUnit.NANOSECONDS.convert(2000, TimeUnit.MILLISECONDS); + static LongConsumer DELAYED_REQ_LOGGER = + it -> { + if (it > DELAY_WARN_THRESHOLD) { + log.info( + "Remote shard request delayed by {} milliseconds", + TimeUnit.MILLISECONDS.convert(it, TimeUnit.NANOSECONDS)); + } + }; private Cancellable doRequest( String baseUrl, @@ -273,13 +288,7 @@ public void onStart() { // the request. Here we add extra logging to notify us if this assumption is // violated. See: SOLR-16099, SOLR-16129, // https://github.com/fullstorydev/lucene-solr/commit/445508adb4a - long delayed = System.nanoTime() - requestSubmitTimeNanos; - if (delayed > DELAY_WARN_THRESHOLD) { - log.info( - "Remote shard request to {} delayed by {} milliseconds", - req.servers, - TimeUnit.MILLISECONDS.convert(delayed, TimeUnit.NANOSECONDS)); - } + delayedRequestListener.accept(System.nanoTime() - requestSubmitTimeNanos); AsyncListener.super.onStart(); } @@ -342,6 +351,8 @@ public static class Builder { private final Http2SolrClient http2SolrClient; private final String[] baseSolrUrls; + + private LongConsumer delayedRequestListener; private long aliveCheckIntervalMillis = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS); // 1 minute between checks @@ -350,6 +361,11 @@ public Builder(Http2SolrClient http2Client, String... baseSolrUrls) { this.baseSolrUrls = baseSolrUrls; } + public Builder setDelayedRequestListener(LongConsumer listener) { + this.delayedRequestListener = listener; + return this; + } + /** * LBHttpSolrServer keeps pinging the dead servers at fixed interval to find if it is alive. Use * this to set that interval @@ -367,7 +383,8 @@ public LBHttp2SolrClient.Builder setAliveCheckInterval(int aliveCheckInterval, T public LBHttp2SolrClient build() { LBHttp2SolrClient solrClient = - new LBHttp2SolrClient(this.http2SolrClient, Arrays.asList(this.baseSolrUrls)); + new LBHttp2SolrClient( + this.http2SolrClient, Arrays.asList(this.baseSolrUrls), delayedRequestListener); solrClient.aliveCheckIntervalMillis = this.aliveCheckIntervalMillis; return solrClient; }