-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[v2] Implement sanitizers to operate on OTLP data #5551
Changes from 19 commits
32db390
7af6144
add2f2b
c0a30c3
fd89e01
c2841d8
53ae6bd
0ef93f3
7997c1b
48a5229
d029e7b
cdd2633
2c6fe5c
7f0e9cc
1d46f5d
45ee0aa
3464d25
786a279
28def9c
75026c0
dab72f9
1062ba0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package sanitizer | ||
|
||
import ( | ||
"go.opentelemetry.io/collector/pdata/ptrace" | ||
) | ||
|
||
// Constants for the replacement names | ||
const ( | ||
emptyServiceName = "empty-service-name" | ||
missingServiceName = "missing-service-name" | ||
) | ||
|
||
// NewEmptyServiceNameSanitizer returns a function that replaces empty service names | ||
// with a predefined string. | ||
func NewEmptyServiceNameSanitizer() SanitizeTraces { | ||
return sanitizeEmptyServiceName | ||
Check warning on line 19 in cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go Codecov / codecov/patchcmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go#L18-L19
|
||
} | ||
|
||
// sanitizeEmptyServiceName sanitizes the service names in the resource attributes. | ||
func sanitizeEmptyServiceName(traces ptrace.Traces) ptrace.Traces { | ||
resourceSpans := traces.ResourceSpans() | ||
for i := 0; i < resourceSpans.Len(); i++ { | ||
resourceSpan := resourceSpans.At(i) | ||
attributes := resourceSpan.Resource().Attributes() | ||
serviceNameAttr, ok := attributes.Get("service.name") | ||
if !ok { | ||
Check warning on line 29 in cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go Codecov / codecov/patchcmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go#L23-L29
|
||
// If service.name is missing, set it to nullProcessServiceName | ||
attributes.PutStr("service.name", missingServiceName) | ||
} else if serviceNameAttr.Str() == "" { | ||
Check warning on line 32 in cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go Codecov / codecov/patchcmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go#L31-L32
|
||
// If service.name is empty, replace it with serviceNameReplacement | ||
attributes.PutStr("service.name", emptyServiceName) | ||
Check warning on line 34 in cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go Codecov / codecov/patchcmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go#L34
|
||
} | ||
} | ||
return traces | ||
Check warning on line 37 in cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go Codecov / codecov/patchcmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go#L37
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package sanitizer |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package sanitizer | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/jaegertracing/jaeger/pkg/testutils" | ||
) | ||
|
||
func TestMain(m *testing.M) { | ||
testutils.VerifyGoLeaks(m) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package sanitizer | ||
|
||
import ( | ||
"go.opentelemetry.io/collector/pdata/ptrace" | ||
) | ||
|
||
// SanitizeTraces is a function that performs enrichment, clean-up, or normalization of trace data. | ||
type SanitizeTraces func(traces ptrace.Traces) ptrace.Traces | ||
|
||
// NewStandardSanitizers are automatically applied by SpanProcessor. | ||
func NewStandardSanitizers() []SanitizeTraces { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you need to call this from the exporter |
||
return []SanitizeTraces{ | ||
NewEmptyServiceNameSanitizer(), | ||
NewUTF8Sanitizer(), | ||
} | ||
} | ||
|
||
// NewChainedSanitizer creates a Sanitizer from the variadic list of passed Sanitizers. | ||
// If the list only has one element, it is returned directly to minimize indirection. | ||
func NewChainedSanitizer(sanitizers ...SanitizeTraces) SanitizeTraces { | ||
if len(sanitizers) == 1 { | ||
return sanitizers[0] | ||
} | ||
return func(traces ptrace.Traces) ptrace.Traces { | ||
for _, s := range sanitizers { | ||
traces = s(traces) | ||
} | ||
return traces | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package sanitizer | ||
|
||
import ( | ||
"unicode/utf8" | ||
|
||
"go.opentelemetry.io/collector/pdata/pcommon" | ||
"go.opentelemetry.io/collector/pdata/ptrace" | ||
) | ||
|
||
const ( | ||
invalidOperation = "InvalidOperationName" | ||
invalidTagKey = "InvalidTagKey" | ||
badUTF8Prefix = "bad_utf8_" | ||
) | ||
|
||
// NewUTF8Sanitizer creates a UTF8 sanitizer. | ||
func NewUTF8Sanitizer() SanitizeTraces { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add a test |
||
return sanitizeUTF8 | ||
} | ||
|
||
// sanitizeUTF8 sanitizes the UTF8 in the traces. | ||
func sanitizeUTF8(traces ptrace.Traces) ptrace.Traces { | ||
resourceSpans := traces.ResourceSpans() | ||
for i := 0; i < resourceSpans.Len(); i++ { | ||
resourceSpan := resourceSpans.At(i) | ||
|
||
// Sanitize resource attributes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments like this provide no value |
||
sanitizeAttributes(resourceSpan.Resource().Attributes()) | ||
|
||
scopeSpans := resourceSpan.ScopeSpans() | ||
for j := 0; j < scopeSpans.Len(); j++ { | ||
scopeSpan := scopeSpans.At(j) | ||
|
||
// Sanitize scope attributes | ||
sanitizeAttributes(scopeSpan.Scope().Attributes()) | ||
|
||
spans := scopeSpan.Spans() | ||
for k := 0; k < spans.Len(); k++ { | ||
span := spans.At(k) | ||
|
||
// Sanitize operation name | ||
if !utf8.ValidString(span.Name()) { | ||
originalName := span.Name() | ||
span.SetName(invalidOperation) | ||
binaryAttr := span.Attributes().PutEmptyBytes(badUTF8Prefix + "operation_name") | ||
binaryAttr.FromRaw([]byte(originalName)) | ||
} | ||
|
||
// Sanitize span attributes | ||
sanitizeAttributes(span.Attributes()) | ||
} | ||
} | ||
} | ||
return traces | ||
} | ||
|
||
// sanitizeAttributes sanitizes attributes to ensure UTF8 validity. | ||
func sanitizeAttributes(attributes pcommon.Map) { | ||
// Collect invalid keys and values during iteration | ||
var invalidKeys []string | ||
invalidValues := make(map[string]string) | ||
|
||
attributes.Range(func(k string, v pcommon.Value) bool { | ||
// Handle invalid UTF-8 in attribute keys | ||
if !utf8.ValidString(k) { | ||
invalidKeys = append(invalidKeys, k) | ||
} | ||
// Handle invalid UTF-8 in attribute values | ||
if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { | ||
invalidValues[k] = v.Str() | ||
} | ||
return true | ||
}) | ||
|
||
// Apply collected changes after iteration | ||
for _, k := range invalidKeys { | ||
originalKey := k | ||
attributes.PutStr(invalidTagKey, k) | ||
binaryAttr := attributes.PutEmptyBytes(badUTF8Prefix + originalKey) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if the originalKey is a bad UTF8 string, you are still creating a new key with the same bad string |
||
binaryAttr.FromRaw([]byte(originalKey)) | ||
} | ||
|
||
for k, v := range invalidValues { | ||
originalValue := v | ||
attributes.PutStr(k, invalidTagKey) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what does this achieve? I suggest you write a specification in English in the comments to the sanitizer what exactly it's trying to do with attributes. And I think starting with the unit tests in this case would be much more useful, because you can see exactly how input and output data are related. For example, I expect the following to be true:
|
||
binaryAttr := attributes.PutEmptyBytes(badUTF8Prefix + k) | ||
binaryAttr.FromRaw([]byte(originalValue)) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
better to import semantic conventions and use a constant from there