From 4147e98532b49b33882ca5277a340dde699926cb Mon Sep 17 00:00:00 2001 From: Sylvain Juge <763082+SylvainJuge@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:43:47 +0100 Subject: [PATCH] add simple retry strategy --- .../apm/agent/jmx/JmxConfiguration.java | 16 +++++ .../apm/agent/jmx/JmxMetricTracker.java | 72 +++++++++++++++---- 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java index e1026b4e8c..c9ebf2c705 100644 --- a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java +++ b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxConfiguration.java @@ -18,12 +18,16 @@ */ package co.elastic.apm.agent.jmx; +import co.elastic.apm.agent.tracer.configuration.TimeDuration; +import co.elastic.apm.agent.tracer.configuration.TimeDurationValueConverter; import org.stagemonitor.configuration.ConfigurationOption; import org.stagemonitor.configuration.ConfigurationOptionProvider; import java.util.Collections; import java.util.List; +import static co.elastic.apm.agent.tracer.configuration.RangeValidator.isNotInRange; + public class JmxConfiguration extends ConfigurationOptionProvider { private ConfigurationOption> captureJmxMetrics = ConfigurationOption.>builder(JmxMetric.TokenValueConverter.INSTANCE, List.class) @@ -137,4 +141,16 @@ public class JmxConfiguration extends ConfigurationOptionProvider { ConfigurationOption> getCaptureJmxMetrics() { return captureJmxMetrics; } + + private final ConfigurationOption delayFailedRegistrationRetry = TimeDurationValueConverter.durationOption("m") + .key("jmx_failed_retry_interval") + .tags("internal") + .description("If set to a value greater or equal to 1m, the agent will retry failed JMX metric registrations.") + .addValidator(isNotInRange(TimeDuration.of("1ms"), TimeDuration.of("59s"))) + .dynamic(true) + .buildWithDefault(TimeDuration.of("0m")); + + public ConfigurationOption getDelayFailedRegistrationRetry() { + return delayFailedRegistrationRetry; + } } diff --git a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java index 6790682864..c7dda92fcc 100644 --- a/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java +++ b/apm-agent-plugins/apm-jmx-plugin/src/main/java/co/elastic/apm/agent/jmx/JmxMetricTracker.java @@ -23,10 +23,12 @@ import co.elastic.apm.agent.metrics.DoubleSupplier; import co.elastic.apm.agent.metrics.Labels; import co.elastic.apm.agent.metrics.MetricRegistry; -import co.elastic.apm.agent.tracer.GlobalLocks; +import co.elastic.apm.agent.sdk.internal.util.ExecutorUtils; +import co.elastic.apm.agent.sdk.internal.util.PrivilegedActionUtils; import co.elastic.apm.agent.sdk.logging.Logger; import co.elastic.apm.agent.sdk.logging.LoggerFactory; -import co.elastic.apm.agent.sdk.internal.util.PrivilegedActionUtils; +import co.elastic.apm.agent.tracer.GlobalLocks; +import co.elastic.apm.agent.tracer.configuration.TimeDuration; import org.stagemonitor.configuration.ConfigurationOption; import javax.annotation.Nullable; @@ -54,6 +56,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; public class JmxMetricTracker extends AbstractLifecycleListener { @@ -68,9 +71,17 @@ public class JmxMetricTracker extends AbstractLifecycleListener { @Nullable private volatile NotificationListener listener; + private final List failedMetrics; + + @Nullable + private ScheduledExecutorService retryExecutor; + public JmxMetricTracker(ElasticApmTracer tracer) { jmxConfiguration = tracer.getConfig(JmxConfiguration.class); metricRegistry = tracer.getMetricRegistry(); + + // using a synchronized list so adding to the list does not require synchronization + failedMetrics = Collections.synchronizedList(new ArrayList()); } @Override @@ -175,8 +186,15 @@ synchronized void init(final MBeanServer platformMBeanServer) { jmxConfiguration.getCaptureJmxMetrics().addChangeListener(new ConfigurationOption.ChangeListener>() { @Override public void onChange(ConfigurationOption configurationOption, List oldValue, List newValue) { - List oldRegistrations = compileJmxMetricRegistrations(oldValue, platformMBeanServer); - List newRegistrations = compileJmxMetricRegistrations(newValue, platformMBeanServer); + List registrationErrors = new ArrayList(); // those are not needed + List oldRegistrations = compileJmxMetricRegistrations(oldValue, platformMBeanServer, registrationErrors); + + List newRegistrations; + synchronized (failedMetrics) { + failedMetrics.clear(); + newRegistrations = compileJmxMetricRegistrations(newValue, platformMBeanServer, failedMetrics); + } + for (JmxMetricRegistration addedRegistration : removeAll(oldRegistrations, newRegistrations)) { addedRegistration.register(platformMBeanServer, metricRegistry); @@ -184,10 +202,27 @@ public void onChange(ConfigurationOption configurationOption, List for (JmxMetricRegistration deletedRegistration : removeAll(newRegistrations, oldRegistrations)) { deletedRegistration.unregister(metricRegistry); } - } }); - register(jmxConfiguration.getCaptureJmxMetrics().get(), platformMBeanServer); + + ConfigurationOption failedRetryConfig = jmxConfiguration.getDelayFailedRegistrationRetry(); + long retryMillis = failedRetryConfig.getValue().getMillis(); + if (!failedRetryConfig.isDefault()) { + retryExecutor = ExecutorUtils.createSingleThreadSchedulingDaemonPool("jmx-retry"); + retryExecutor.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + List failed = JmxMetricTracker.this.failedMetrics; + synchronized (failed) { + List toRetry = new ArrayList<>(failed); + failed.clear(); + register(toRetry, platformMBeanServer, failed); + } + } + }, retryMillis, retryMillis, TimeUnit.MILLISECONDS); + } + + register(jmxConfiguration.getCaptureJmxMetrics().get(), platformMBeanServer, failedMetrics); } private void registerMBeanNotificationListener(final MBeanServer server) { @@ -217,7 +252,7 @@ private void addMBean(ObjectName mBeanName, JmxMetric jmxMetric) { ObjectName metricName = jmxMetric.getObjectName(); if (metricName.apply(mBeanName) || matchesJbossStatisticsPool(mBeanName, metricName, server)) { logger.debug("MBean added at runtime: {}", jmxMetric.getObjectName()); - register(Collections.singletonList(jmxMetric), server); + register(Collections.singletonList(jmxMetric), server, failedMetrics); } } @@ -280,25 +315,33 @@ private static List removeAll(List removeFromThis, List toRemove) { return result; } - private void register(List jmxMetrics, MBeanServer server) { - for (JmxMetricRegistration registration : compileJmxMetricRegistrations(jmxMetrics, server)) { + private void register(List jmxMetrics, MBeanServer server, List failedMetrics) { + for (JmxMetricRegistration registration : compileJmxMetricRegistrations(jmxMetrics, server, failedMetrics)) { registration.register(server, metricRegistry); } } /** * A single {@link JmxMetric} can yield multiple {@link JmxMetricRegistration}s if the {@link JmxMetric} contains multiple attributes + * + * @param jmxMetrics JMX metrics to register + * @param server MBean server + * @param failedMetrics list of JMX metrics that failed to register (out) */ - private List compileJmxMetricRegistrations(List jmxMetrics, MBeanServer server) { - List registrations = new ArrayList<>(); + private List compileJmxMetricRegistrations(List jmxMetrics, MBeanServer server, List failedMetrics) { + List globalRegistrations = new ArrayList<>(); for (JmxMetric jmxMetric : jmxMetrics) { + List metricRegistrations = new ArrayList<>(); try { - addJmxMetricRegistration(jmxMetric, registrations, server); + addJmxMetricRegistration(jmxMetric, metricRegistrations, server); + globalRegistrations.addAll(metricRegistrations); } catch (Exception e) { + failedMetrics.add(jmxMetric); logger.error("Failed to register JMX metric {}", jmxMetric.toString(), e); } + } - return registrations; + return globalRegistrations; } private static void addJmxMetricRegistration(final JmxMetric jmxMetric, List registrations, MBeanServer server) throws JMException { @@ -471,5 +514,8 @@ public void stop() throws Exception { if (logManagerPropertyPoller != null) { logManagerPropertyPoller.interrupt(); } + if (retryExecutor != null) { + ExecutorUtils.shutdownAndWaitTermination(retryExecutor); + } } }