Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add per-event JVM GC duration tracking #17472

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/operations/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,8 @@ For more information, see [Enabling Metrics](../configuration/index.md#enabling-
|`jvm/mem/committed`|Committed memory|`memKind`, `jvmVersion`|Close to max memory|
|`jvm/gc/count`|Garbage collection count|`gcName` (cms/g1/parallel/etc.), `gcGen` (old/young), `jvmVersion`|Varies|
|`jvm/gc/cpu`|Count of CPU time in Nanoseconds spent on garbage collection. Note: `jvm/gc/cpu` represents the total time over multiple GC cycles; divide by `jvm/gc/count` to get the mean GC time per cycle.|`gcName`, `gcGen`, `jvmVersion`|Sum of `jvm/gc/cpu` should be within 10-30% of sum of `jvm/cpu/total`, depending on the GC algorithm used (reported by [`JvmCpuMonitor`](../configuration/index.md#enabling-metrics)). |
|`jvm/gc/pause`| Stop-the-world garbage collection JVM-reported pause time (ms). Only emitted if `druid.monitoring.jvm.duration=true`.| `gcName` (cms/g1/parallel/etc.), `gcGen` (old/young), `jvmVersion` | Varies|
|`jvm/gc/concurrentTime`| JVM-reported time spent in concurrent phases of CMS pauses (ms). Only emitted if `druid.monitoring.jvm.duration=true`.| `gcName`(cms/g1/etc.), `gcGen` (old/young), `jvmVersion` | Varies|

### ZooKeeper

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.java.util.metrics;


import java.lang.reflect.Array;

// A fixed-size, thread-safe, append-only ring buffer for buffering events prior to emission.
// Events will overflow and wrap-around if buffer size is exceeded within a single emission window.
public class EventBuffer<Event>
{
private final Event[] buffer;
private final Class<Event> clazz;
private final int capacity;
private int back;
private int size;

public EventBuffer(Class<Event> clazz, int capacity)
{
this.clazz = clazz;
this.buffer = (Event[]) Array.newInstance(clazz, capacity);
this.capacity = capacity;
this.back = 0;
}

public synchronized int getCapacity()
{
return capacity;
}

public synchronized int getSize()
{
return size;
}

public synchronized void push(Event event)
{
buffer[back] = event;
back = (back + 1) % capacity;

if (size < capacity) {
++size;
}
}

public synchronized Event[] extract()
{
final Event[] finalEvents = (Event[]) Array.newInstance(clazz, size);
System.arraycopy(buffer, 0, finalEvents, 0, size);
size = back = 0;
return finalEvents;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,16 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.java.util.common.Pair;
import com.sun.management.GarbageCollectionNotificationInfo;
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
import org.apache.druid.java.util.emitter.service.ServiceMetricEvent;
import org.apache.druid.java.util.metrics.jvm.gc.GcEvent;

import javax.annotation.Nullable;
import javax.management.Notification;
import javax.management.NotificationEmitter;
import javax.management.NotificationListener;
import javax.management.openmbean.CompositeData;
import java.lang.management.BufferPoolMXBean;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
Expand All @@ -42,30 +47,37 @@ public class JvmMonitor extends FeedDefiningMonitor

private static final String JVM_VERSION = "jvmVersion";
private static final String JAVA_VERSION = System.getProperty("java.version");
private final JvmMonitorConfig config;

@VisibleForTesting
@Nullable
final GcCollectors gcCollectors;
private final Map<String, String[]> dimensions;
@Nullable
private final AllocationMetricCollector collector;
@Nullable
final GcGranularEventCollector gcEventCollector;

public JvmMonitor()
public JvmMonitor(JvmMonitorConfig config)
{
this(ImmutableMap.of());
this(ImmutableMap.of(), config);
}

public JvmMonitor(Map<String, String[]> dimensions)
public JvmMonitor(Map<String, String[]> dimensions, JvmMonitorConfig config)
{
this(dimensions, DEFAULT_METRICS_FEED);
this(dimensions, DEFAULT_METRICS_FEED, config);
}

public JvmMonitor(Map<String, String[]> dimensions, String feed)
public JvmMonitor(Map<String, String[]> dimensions, String feed, JvmMonitorConfig config)
{
super(feed);
Preconditions.checkNotNull(dimensions);
this.config = config;
this.dimensions = ImmutableMap.copyOf(dimensions);
this.collector = AllocationMetricCollectors.getAllocationMetricCollector();
this.gcCollectors = new GcCollectors();

this.gcEventCollector = config.recordDuration() ? new GcGranularEventCollector() : null;
}

@Override
Expand Down Expand Up @@ -150,6 +162,9 @@ private void emitDirectMemMetrics(ServiceEmitter emitter)
private void emitGcMetrics(ServiceEmitter emitter)
{
gcCollectors.emit(emitter, dimensions);
if (gcEventCollector != null) {
gcEventCollector.emit(emitter, dimensions);
}
}

private class GcCollectors
Expand Down Expand Up @@ -188,79 +203,25 @@ void emit(ServiceEmitter emitter, Map<String, String[]> dimensions)

private class GcGenerationCollector
{
private static final String GC_YOUNG_GENERATION_NAME = "young";
private static final String GC_OLD_GENERATION_NAME = "old";
private static final String GC_ZGC_GENERATION_NAME = "zgc";
private static final String CMS_COLLECTOR_NAME = "cms";
private static final String G1_COLLECTOR_NAME = "g1";
private static final String PARALLEL_COLLECTOR_NAME = "parallel";
private static final String SERIAL_COLLECTOR_NAME = "serial";
private static final String ZGC_COLLECTOR_NAME = "zgc";
private static final String SHENANDOAN_COLLECTOR_NAME = "shenandoah";
private final String generation;
private final String collectorName;
private final GcEvent event;
private final GarbageCollectorMXBean gcBean;
private long lastInvocations = 0;
private long lastCpuMillis = 0;

GcGenerationCollector(GarbageCollectorMXBean gcBean)
{
Pair<String, String> gcNamePair = getReadableName(gcBean.getName());
this.generation = gcNamePair.lhs;
this.collectorName = gcNamePair.rhs;
this.event = new GcEvent(gcBean.getName());
this.gcBean = gcBean;
}

private Pair<String, String> getReadableName(String name)
{
switch (name) {
//CMS
case "ParNew":
return new Pair<>(GC_YOUNG_GENERATION_NAME, CMS_COLLECTOR_NAME);
case "ConcurrentMarkSweep":
return new Pair<>(GC_OLD_GENERATION_NAME, CMS_COLLECTOR_NAME);

// G1
case "G1 Young Generation":
return new Pair<>(GC_YOUNG_GENERATION_NAME, G1_COLLECTOR_NAME);
case "G1 Old Generation":
return new Pair<>(GC_OLD_GENERATION_NAME, G1_COLLECTOR_NAME);

// Parallel
case "PS Scavenge":
return new Pair<>(GC_YOUNG_GENERATION_NAME, PARALLEL_COLLECTOR_NAME);
case "PS MarkSweep":
return new Pair<>(GC_OLD_GENERATION_NAME, PARALLEL_COLLECTOR_NAME);

// Serial
case "Copy":
return new Pair<>(GC_YOUNG_GENERATION_NAME, SERIAL_COLLECTOR_NAME);
case "MarkSweepCompact":
return new Pair<>(GC_OLD_GENERATION_NAME, SERIAL_COLLECTOR_NAME);

//zgc
case "ZGC":
return new Pair<>(GC_ZGC_GENERATION_NAME, ZGC_COLLECTOR_NAME);

//Shenandoah
case "Shenandoah Cycles":
return new Pair<>(GC_YOUNG_GENERATION_NAME, SHENANDOAN_COLLECTOR_NAME);
case "Shenandoah Pauses":
return new Pair<>(GC_OLD_GENERATION_NAME, SHENANDOAN_COLLECTOR_NAME);

default:
return new Pair<>(name, name);
}
}

void emit(ServiceEmitter emitter, Map<String, String[]> dimensions)
{
ImmutableMap.Builder<String, String[]> dimensionsCopyBuilder = ImmutableMap
.<String, String[]>builder()
.putAll(dimensions)
.put("gcGen", new String[]{generation});
.put("gcGen", new String[]{event.druidGenerationName});

dimensionsCopyBuilder.put("gcName", new String[]{collectorName});
dimensionsCopyBuilder.put("gcName", new String[]{event.druidCollectorName});

Map<String, String[]> dimensionsCopy = dimensionsCopyBuilder.build();

Expand Down Expand Up @@ -323,4 +284,63 @@ void emit(ServiceEmitter emitter, Map<String, String[]> dimensions)
emitter.emit(builder.setMetric("jvm/gc/mem/init", memoryUsage.getInit()));
}
}

private class GcGranularEventCollector
{
// From: https://github.com/Netflix/spectator/blob/main/spectator-ext-gc/src/main/java/com/netflix/spectator/gc/GcLogger.java#L56
private static final int BUFFER_SIZE = 256;
private final EventBuffer<ServiceMetricEvent.Builder> buffer;
private final GcNotificationListener listener;

public GcGranularEventCollector()
{
this.buffer = new EventBuffer<>(ServiceMetricEvent.Builder.class, BUFFER_SIZE);
this.listener = new GcNotificationListener();

for (GarbageCollectorMXBean mbean : ManagementFactory.getGarbageCollectorMXBeans()) {
if (mbean instanceof NotificationEmitter) {
final NotificationEmitter emitter = (NotificationEmitter) mbean;
emitter.addNotificationListener(this.listener, null, null);
}
}
}

void emit(ServiceEmitter emitter, Map<String, String[]> dimensions)
{
final ServiceMetricEvent.Builder[] events = buffer.extract();
for (ServiceMetricEvent.Builder builder : events) {
MonitorUtils.addDimensionsToBuilder(builder, dimensions);
emitter.emit(builder);
}
}

private void processGcEvent(GarbageCollectionNotificationInfo info)
{
final ServiceMetricEvent.Builder builder = builder();

final GcEvent event = new GcEvent(info.getGcName(), info.getGcCause());
builder.setDimension("gcName", new String[]{event.druidCollectorName});
builder.setDimension("gcGen", new String[]{event.druidGenerationName});
builder.setDimension(JVM_VERSION, JAVA_VERSION);

// record pause time or concurrent time
final String durationMetricName = event.isConcurrent() ? "jvm/gc/concurrentTime" : "jvm/gc/pause";
builder.setMetric(durationMetricName, info.getGcInfo().getDuration());
buffer.push(builder);
}

private class GcNotificationListener implements NotificationListener
{
@Override
public void handleNotification(Notification notification, Object ref)
{
final String type = notification.getType();
if (GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION.equals(type)) {
CompositeData cd = (CompositeData) notification.getUserData();
GarbageCollectionNotificationInfo info = GarbageCollectionNotificationInfo.from(cd);
processGcEvent(info);
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.apache.druid.java.util.metrics;

import com.fasterxml.jackson.annotation.JsonProperty;

public class JvmMonitorConfig
{
// The JVMMonitor is really more like a JVM memory + GC monitor
public static final String PREFIX = "druid.monitoring.jvm";

@JsonProperty("duration")
private boolean duration = false;

public boolean recordDuration() {
return duration;
}

public JvmMonitorConfig(@JsonProperty("duration") final boolean duration)
{
this.duration = duration;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,36 +25,38 @@
public class Monitors
{
/**
* Creates a JVM monitor, configured with the given dimensions, that gathers all currently available JVM-wide
* Creates a JVM monitor, configured with the given dimensions and config that gathers all currently available JVM-wide
* monitors. Emitted events have default feed {@link FeedDefiningMonitor#DEFAULT_METRICS_FEED}
* See: {@link Monitors#createCompoundJvmMonitor(Map, String)}
* See: {@link Monitors#createCompoundJvmMonitor(Map, String, JvmMonitorConfig)}
*
* @param dimensions common dimensions to configure the JVM monitor with
* @param config JVM-wide monitor config
*
* @return a universally useful JVM-wide monitor
*/
public static Monitor createCompoundJvmMonitor(Map<String, String[]> dimensions)
public static Monitor createCompoundJvmMonitor(Map<String, String[]> dimensions, JvmMonitorConfig config)
{
return createCompoundJvmMonitor(dimensions, FeedDefiningMonitor.DEFAULT_METRICS_FEED);
return createCompoundJvmMonitor(dimensions, FeedDefiningMonitor.DEFAULT_METRICS_FEED, config);
}

/**
* Creates a JVM monitor, configured with the given dimensions, that gathers all currently available JVM-wide
* Creates a JVM monitor, configured with the given dimensions and config that gathers all currently available JVM-wide
* monitors: {@link JvmMonitor}, {@link JvmCpuMonitor} and {@link JvmThreadsMonitor} (this list may
* change in any future release of this library, including a minor release).
*
* @param dimensions common dimensions to configure the JVM monitor with
* @param feed feed for all emitted events
* @param config JVM-wide monitor config
*
* @return a universally useful JVM-wide monitor
*/
public static Monitor createCompoundJvmMonitor(Map<String, String[]> dimensions, String feed)
public static Monitor createCompoundJvmMonitor(Map<String, String[]> dimensions, String feed, JvmMonitorConfig config)
{
// This list doesn't include SysMonitor because it should probably be run only in one JVM, if several JVMs are
// running on the same instance, so most of the time SysMonitor should be configured/set up differently than
// "simple" JVM monitors, created below.
return and(// Could equally be or(), because all member monitors always return true from their monitor() methods.
new JvmMonitor(dimensions, feed),
new JvmMonitor(dimensions, feed, config),
new JvmCpuMonitor(dimensions, feed),
new JvmThreadsMonitor(dimensions, feed)
);
Expand Down
Loading
Loading