diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 44b9c2f8e5..c04a3f9a2c 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -31,6 +31,10 @@ Use subheadings with the "=====" level for adding notes for unreleased changes: === Unreleased +[float] +===== Bug fixes +* Fixed edge case where inferred spans could cause cycles in the trace parent-child relationships, subsequently resulting in the UI crashing - {pull}3588[#3588] + [[release-notes-1.x]] === Java Agent version 1.x diff --git a/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java b/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java index f8854468f0..cc3df527cb 100644 --- a/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java +++ b/apm-agent-core/src/main/java/co/elastic/apm/agent/impl/transaction/TraceContext.java @@ -64,7 +64,7 @@ public class TraceContext implements Recyclable, co.elastic.apm.agent.tracer.Tra public static final String ELASTIC_TRACE_PARENT_TEXTUAL_HEADER_NAME = "elastic-apm-traceparent"; public static final String W3C_TRACE_PARENT_TEXTUAL_HEADER_NAME = "traceparent"; public static final String TRACESTATE_HEADER_NAME = "tracestate"; - public static final int SERIALIZED_LENGTH = 42; + public static final int SERIALIZED_LENGTH = 51; private static final int TEXT_HEADER_EXPECTED_LENGTH = 55; private static final int TEXT_HEADER_TRACE_ID_OFFSET = 3; private static final int TEXT_HEADER_PARENT_ID_OFFSET = 36; @@ -227,6 +227,7 @@ private TraceContext(ElasticApmTracer tracer, Id id) { *

* Note: the {@link #traceId} will still be 128 bit *

+ * * @param tracer a valid tracer */ public static TraceContext with64BitId(ElasticApmTracer tracer) { @@ -621,7 +622,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; TraceContext that = (TraceContext) o; return id.equals(that.id) && - traceId.equals(that.traceId); + traceId.equals(that.traceId); } public boolean idEquals(@Nullable TraceContext o) { @@ -650,6 +651,8 @@ public void serialize(byte[] buffer) { offset = traceId.toBytes(buffer, offset); offset = id.toBytes(buffer, offset); offset = transactionId.toBytes(buffer, offset); + buffer[offset++] = parentId.isEmpty() ? (byte) 0 : (byte) 1; + offset = parentId.toBytes(buffer, offset); buffer[offset++] = flags; buffer[offset++] = (byte) (discardable ? 1 : 0); ByteUtils.putLong(buffer, offset, clock.getOffset()); @@ -660,6 +663,12 @@ public void deserialize(byte[] buffer, @Nullable String serviceName, @Nullable S offset += traceId.fromBytes(buffer, offset); offset += id.fromBytes(buffer, offset); offset += transactionId.fromBytes(buffer, offset); + if (buffer[offset++] != 0) { + offset += parentId.fromBytes(buffer, offset); + } else { + parentId.resetState(); + offset += 8; + } flags = buffer[offset++]; discardable = buffer[offset++] == (byte) 1; clock.init(ByteUtils.getLong(buffer, offset)); @@ -672,6 +681,10 @@ public static long getSpanId(byte[] serializedTraceContext) { return ByteUtils.getLong(serializedTraceContext, 16); } + public static long getParentId(byte[] serializedTraceContext) { + return ByteUtils.getLong(serializedTraceContext, 33); + } + public boolean traceIdAndIdEquals(byte[] serialized) { return id.dataEquals(serialized, traceId.getLength()) && traceId.dataEquals(serialized, 0); } diff --git a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java index 551a74b8be..3042fee406 100644 --- a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java +++ b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/CallTree.java @@ -79,9 +79,9 @@ public class CallTree implements Recyclable { * @see co.elastic.apm.agent.impl.transaction.AbstractSpan#childIds */ @Nullable - private LongList childIds; + private ChildList childIds; @Nullable - private LongList maybeChildIds; + private ChildList maybeChildIds; public CallTree() { } @@ -375,20 +375,21 @@ private void toString(Appendable out, int level) throws IOException { } } - int spanify(CallTree.Root root, TraceContext parentContext) { + int spanify(CallTree.Root root, TraceContext parentContext, TraceContext nonInferredParentContext) { int createdSpans = 0; if (activeContextOfDirectParent != null) { parentContext = activeContextOfDirectParent; + nonInferredParentContext = activeContextOfDirectParent; } Span span = null; if (!isPillar() || isLeaf()) { createdSpans++; - span = asSpan(root, parentContext); + span = asSpan(root, parentContext, nonInferredParentContext); this.isSpan = true; } List children = getChildren(); for (int i = 0, size = children.size(); i < size; i++) { - createdSpans += children.get(i).spanify(root, span != null ? span.getTraceContext() : parentContext); + createdSpans += children.get(i).spanify(root, span != null ? span.getTraceContext() : parentContext, nonInferredParentContext); } if (span != null) { span.end(span.getTimestamp() + getDurationUs()); @@ -396,7 +397,7 @@ int spanify(CallTree.Root root, TraceContext parentContext) { return createdSpans; } - protected Span asSpan(Root root, TraceContext parentContext) { + protected Span asSpan(Root root, TraceContext parentContext, TraceContext nonInferredParentContext) { transferMaybeChildIdsToChildIds(); Span span = parentContext.createSpan(root.getEpochMicros(this.start)) .withType("app") @@ -410,7 +411,21 @@ protected Span asSpan(Root root, TraceContext parentContext) { } span.appendToName("#"); span.appendToName(frame.getMethodName()); - span.withChildIds(childIds); + + LongList childSpanIds = null; + if (childIds != null) { + long expectedParent = nonInferredParentContext.getId().readLong(0); + childSpanIds = new LongList(childIds.getSize()); + for (int i = 0; i < childIds.getSize(); i++) { + // to avoid cycles, we only insert child-ids if the parent of the child is also + // the parent of the stack of inferred spans inserted + if (childIds.getParentId(i) == expectedParent) { + childSpanIds.add(childIds.getId(i)); + } + } + } + + span.withChildIds(childSpanIds); // we're not interested in the very bottom of the stack which contains things like accepting and handling connections if (!root.rootContext.idEquals(parentContext)) { @@ -498,19 +513,20 @@ public void resetState() { *

* * @param id the child span id to add to this call tree element + * @param parentId the parent id of the span represented by the id parameter */ - public void addMaybeChildId(long id) { + public void addMaybeChildId(long id, long parentId) { if (maybeChildIds == null) { - maybeChildIds = new LongList(); + maybeChildIds = new ChildList(); } - maybeChildIds.add(id); + maybeChildIds.add(id, parentId); } - public void addChildId(long id) { + public void addChildId(long id, long parentId) { if (childIds == null) { - childIds = new LongList(); + childIds = new ChildList(); } - childIds.add(id); + childIds.add(id, parentId); } public boolean hasChildIds() { @@ -541,7 +557,11 @@ void giveChildIdsTo(CallTree giveTo) { void giveLastChildIdTo(CallTree giveTo) { if (childIds != null && !childIds.isEmpty()) { - giveTo.addChildId(childIds.remove(childIds.getSize() - 1)); + int size = childIds.getSize(); + long id = childIds.getId(size - 1); + long parentId = childIds.getParentId(size - 1); + giveTo.addChildId(id, parentId); + childIds.removeLast(); } } @@ -619,7 +639,7 @@ public void onActivation(byte[] active, long timestamp) { long spanId = TraceContext.getSpanId(active); activeSet.add(spanId); if (!isNestedActivation(topOfStack)) { - topOfStack.addMaybeChildId(spanId); + topOfStack.addMaybeChildId(spanId, TraceContext.getParentId(active)); } } } @@ -628,12 +648,12 @@ private boolean isNestedActivation(CallTree topOfStack) { return isAnyActive(topOfStack.childIds) || isAnyActive(topOfStack.maybeChildIds); } - private boolean isAnyActive(@Nullable LongList spanIds) { + private boolean isAnyActive(@Nullable ChildList spanIds) { if (spanIds == null) { return false; } for (int i = 0, size = spanIds.getSize(); i < size; i++) { - if (activeSet.contains(spanIds.get(i))) { + if (activeSet.contains(spanIds.getId(i))) { return true; } } @@ -719,7 +739,7 @@ public int spanify() { int createdSpans = 0; List callTrees = getChildren(); for (int i = 0, size = callTrees.size(); i < size; i++) { - createdSpans += callTrees.get(i).spanify(this, rootContext); + createdSpans += callTrees.get(i).spanify(this, rootContext, rootContext); } return createdSpans; } diff --git a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/ChildList.java b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/ChildList.java new file mode 100644 index 0000000000..c4b079f9f8 --- /dev/null +++ b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/ChildList.java @@ -0,0 +1,64 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package co.elastic.apm.agent.profiler; + + +import co.elastic.apm.agent.sdk.internal.collections.LongList; + +/** List for maintaining pairs of (spanId,parentIds) both represented as longs. */ +public class ChildList { + + // this list contains the (spanId,parentIds) flattened + private final LongList idsWithParentIds = new LongList(); + + public void add(long id, long parentId) { + idsWithParentIds.add(id); + idsWithParentIds.add(parentId); + } + + public long getId(int index) { + return idsWithParentIds.get(index * 2); + } + + public long getParentId(int index) { + return idsWithParentIds.get(index * 2 + 1); + } + + public int getSize() { + return idsWithParentIds.getSize() / 2; + } + + public void addAll(ChildList other) { + idsWithParentIds.addAll(other.idsWithParentIds); + } + + public void clear() { + idsWithParentIds.clear(); + } + + public boolean isEmpty() { + return getSize() == 0; + } + + public void removeLast() { + int size = idsWithParentIds.getSize(); + idsWithParentIds.remove(size - 1); + idsWithParentIds.remove(size - 2); + } +} diff --git a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java index 0c8cb3f674..860a0896de 100644 --- a/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java +++ b/apm-agent-plugins/apm-profiling-plugin/src/main/java/co/elastic/apm/agent/profiler/SamplingProfiler.java @@ -106,7 +106,7 @@ * and at least one stack trace. * Once {@linkplain ActivationEvent#handleDeactivationEvent(SamplingProfiler) handling the deactivation event} of the root span in a thread * (after which {@link ElasticApmTracer#getActive()} would return {@code null}), - * the {@link CallTree} is {@linkplain CallTree#spanify(CallTree.Root, TraceContext) converted into regular spans}. + * the {@link CallTree} is {@linkplain CallTree#spanify(CallTree.Root, TraceContext, TraceContext) converted into regular spans}. *

*

* Overall, the allocation rate does not depend on the number of {@link ActivationEvent}s but only on diff --git a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java index aea204c257..738712e825 100644 --- a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java +++ b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeSpanifyTest.java @@ -35,6 +35,7 @@ import org.stagemonitor.configuration.ConfigurationRegistry; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Objects; import java.util.concurrent.TimeUnit; @@ -141,8 +142,62 @@ void testCallTreeWithActiveSpan() { root.spanify(); assertThat(reporter.getSpans()).hasSize(2); - assertThat(reporter.getSpans().get(1).getTraceContext().isChildOf(spanContext)); - assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(rootContext)); + assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(spanContext)).isTrue(); + assertThat(reporter.getSpans().get(1).getTraceContext().isChildOf(rootContext)).isTrue(); + } + + + @Test + void testSpanWithInvertedActivation() { + TraceContext rootContext = CallTreeTest.rootTraceContext(tracer); + + TraceContext childSpanContext = TraceContext.with64BitId(tracer); + TraceContext.fromParentContext().asChildOf(childSpanContext, rootContext); + + NoopObjectPool rootPool = NoopObjectPool.ofRecyclable(() -> new CallTree.Root(tracer)); + NoopObjectPool childPool = NoopObjectPool.ofRecyclable(CallTree::new); + + CallTree.Root root = CallTree.createRoot(rootPool, childSpanContext.serialize(), rootContext.getServiceName(), rootContext.getServiceVersion(), 0); + root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 10_000, childPool, 0); + + root.onActivation(rootContext.serialize(), 20_000); + root.onDeactivation(rootContext.serialize(), childSpanContext.serialize(), 30_000); + + root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 40_000, childPool, 0); + root.end(childPool, 0); + + root.spanify(); + + assertThat(reporter.getSpans()).hasSize(1); + assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(childSpanContext)).isTrue(); + // the inferred span should not have any span links because this + // span link would cause a cycle in the trace + assertThat(reporter.getSpans().get(0).getChildIds().getSize()).isEqualTo(0L); + } + + @Test + void testSpanWithNestedActivation() { + TraceContext rootContext = CallTreeTest.rootTraceContext(tracer); + + NoopObjectPool rootPool = NoopObjectPool.ofRecyclable(() -> new CallTree.Root(tracer)); + NoopObjectPool childPool = NoopObjectPool.ofRecyclable(CallTree::new); + + CallTree.Root root = CallTree.createRoot(rootPool, rootContext.serialize(), rootContext.getServiceName(), rootContext.getServiceVersion(), 0); + root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 10_000, childPool, 0); + + root.onActivation(rootContext.serialize(), 20_000); + root.onDeactivation(rootContext.serialize(), rootContext.serialize(), 30_000); + + root.addStackTrace(tracer, Collections.singletonList(StackFrame.of("A", "a")), 40_000, childPool, 0); + root.end(childPool, 0); + + root.spanify(); + + assertThat(reporter.getSpans()).hasSize(1); + assertThat(reporter.getSpans().get(0).getTraceContext().isChildOf(rootContext)).isTrue(); + // the inferred span should not have any span links because this + // span link would cause a cycle in the trace + assertThat(reporter.getSpans().get(0).getChildIds().getSize()).isEqualTo(0L); } } diff --git a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java index b18d86ecdd..ae64d3387a 100644 --- a/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java +++ b/apm-agent-plugins/apm-profiling-plugin/src/test/java/co/elastic/apm/agent/profiler/CallTreeTest.java @@ -120,7 +120,7 @@ void testCallTree() { @Test void testGiveEmptyChildIdsTo() { CallTree rich = new CallTree(); - rich.addChildId(42); + rich.addChildId(42, 0L); CallTree robinHood = new CallTree(); CallTree poor = new CallTree();