From 32db39043b8483ab18e7df550cf3b44009af0d2e Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Mon, 10 Jun 2024 13:13:20 +0530 Subject: [PATCH 01/16] intial configuration for v2 sanitizers Signed-off-by: Vamshi Maskuri --- cmd/collector/app/sanitizer_v2/empty.go | 33 ++++++++++++ cmd/collector/app/sanitizer_v2/sanitizer.go | 28 ++++++++++ .../sanitizer_v2/service_name_sanitizer.go | 44 +++++++++++++++ .../app/sanitizer_v2/utf8_sanitizer.go | 54 +++++++++++++++++++ 4 files changed, 159 insertions(+) create mode 100644 cmd/collector/app/sanitizer_v2/empty.go create mode 100644 cmd/collector/app/sanitizer_v2/sanitizer.go create mode 100644 cmd/collector/app/sanitizer_v2/service_name_sanitizer.go create mode 100644 cmd/collector/app/sanitizer_v2/utf8_sanitizer.go diff --git a/cmd/collector/app/sanitizer_v2/empty.go b/cmd/collector/app/sanitizer_v2/empty.go new file mode 100644 index 00000000000..005a6a49588 --- /dev/null +++ b/cmd/collector/app/sanitizer_v2/empty.go @@ -0,0 +1,33 @@ +package sanitizer_v2 + +import ( + "go.opentelemetry.io/collector/pdata/ptrace" +) + +// Constants for the replacement names +const ( + serviceNameReplacement = "empty-service-name" + nullProcessServiceName = "null-process-and-service-name" +) + +// NewEmptyServiceNameSanitizer returns a function that replaces empty service names +// with a predefined string. +func NewEmptyServiceNameSanitizer() SanitizeSpan { + return sanitizeEmptyServiceName +} + +// sanitizeEmptyServiceName sanitizes the service names in the span attributes. +func sanitizeEmptyServiceName(span ptrace.Span) ptrace.Span { + attributes := span.Attributes() + serviceNameAttr, ok := attributes.Get("service.name") + + if !ok { + // If service.name is missing, set it to nullProcessServiceName + attributes.PutStr("service.name", nullProcessServiceName) + } else if serviceNameAttr.Str() == "" { + // If service.name is empty, replace it with serviceNameReplacement + attributes.PutStr("service.name", serviceNameReplacement) + } + + return span +} diff --git a/cmd/collector/app/sanitizer_v2/sanitizer.go b/cmd/collector/app/sanitizer_v2/sanitizer.go new file mode 100644 index 00000000000..6d04a05853c --- /dev/null +++ b/cmd/collector/app/sanitizer_v2/sanitizer.go @@ -0,0 +1,28 @@ +package sanitizer_v2 + +import "go.opentelemetry.io/collector/pdata/ptrace" + +// SanitizeSpan sanitizes/normalizes spans. Any business logic that needs to be applied to normalize the contents of a +// span should implement this interface. +type SanitizeSpan func(span ptrace.Span) ptrace.Span + +// NewStandardSanitizers are automatically applied by SpanProcessor. +func NewStandardSanitizers() []SanitizeSpan { + return []SanitizeSpan{ + NewEmptyServiceNameSanitizer(), + } +} + +// NewChainedSanitizer creates a Sanitizer from the variadic list of passed Sanitizers. +// If the list only has one element, it is returned directly to minimize indirection. +func NewChainedSanitizer(sanitizers ...SanitizeSpan) SanitizeSpan { + if len(sanitizers) == 1 { + return sanitizers[0] + } + return func(span ptrace.Span) ptrace.Span { + for _, s := range sanitizers { + span = s(span) + } + return span + } +} diff --git a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go new file mode 100644 index 00000000000..5f0b6cd22f4 --- /dev/null +++ b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go @@ -0,0 +1,44 @@ +package sanitizer_v2 + +import ( + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" +) + +// Cache interface similar to the one in V1 +type Cache interface { + Get(alias string) string + IsEmpty() bool +} + +// NewServiceNameSanitizer creates a service name sanitizer with a given cache. +func NewServiceNameSanitizer(cache Cache) SanitizeSpan { + sanitizer := serviceNameSanitizer{cache: cache} + return sanitizer.Sanitize +} + +// serviceNameSanitizer sanitizes the service names in span annotations given a source of truth alias to service cache. +type serviceNameSanitizer struct { + cache Cache +} + +// Sanitize sanitizes the service names in the span annotations. +func (s serviceNameSanitizer) Sanitize(span ptrace.Span) ptrace.Span { + if s.cache.IsEmpty() { + return span + } + + attributes := span.Attributes() + serviceNameAttr, exists := attributes.Get("service.name") + if !exists || serviceNameAttr.Type() != pcommon.ValueTypeStr { + return span + } + + alias := serviceNameAttr.Str() + serviceName := s.cache.Get(alias) + if serviceName != "" { + attributes.PutStr("service.name", serviceName) + } + + return span +} diff --git a/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go b/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go new file mode 100644 index 00000000000..fd315723579 --- /dev/null +++ b/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go @@ -0,0 +1,54 @@ +package sanitizer_v2 + +import ( + "fmt" + "unicode/utf8" + + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/ptrace" + "go.uber.org/zap" +) + +const ( + invalidOperation = "InvalidOperationName" + invalidService = "InvalidServiceName" + invalidTagKey = "InvalidTagKey" +) + +// UTF8Sanitizer sanitizes all strings in spans. +type UTF8Sanitizer struct { + logger *zap.Logger +} + +// NewUTF8Sanitizer creates a UTF8 sanitizer with logging functionality. +func NewUTF8Sanitizer(logger *zap.Logger) SanitizeSpan { + return UTF8Sanitizer{logger: logger}.Sanitize +} + +// Sanitize sanitizes the UTF8 in the spans. +func (s UTF8Sanitizer) Sanitize(span ptrace.Span) ptrace.Span { + if !utf8.ValidString(span.Name()) { + s.logger.Info("Invalid utf8 operation name", zap.String("operation_name", span.Name())) + span.SetName(invalidOperation) + } + + attributes := span.Attributes() + serviceNameAttr, ok := attributes.Get("service.name") + if ok && !utf8.ValidString(serviceNameAttr.Str()) { + s.logger.Info("Invalid utf8 service name", zap.String("service_name", serviceNameAttr.Str())) + attributes.PutStr("service.name", invalidService) + } + + sanitizeAttributes(attributes) + return span +} + +// sanitizeAttributes sanitizes attributes to ensure UTF8 validity. +func sanitizeAttributes(attributes pcommon.Map) { + attributes.Range(func(k string, v pcommon.Value) bool { + if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { + attributes.PutStr(k, fmt.Sprintf("%s:%s", k, v.Str())) + } + return true + }) +} From 7af6144776775bf0c8f3fd73dd6ca4f4a5835d0b Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Mon, 10 Jun 2024 13:30:10 +0530 Subject: [PATCH 02/16] fix lint Signed-off-by: Vamshi Maskuri --- .../sanitizer_v2/{empty.go => empty_service_name_sanitizer.go} | 3 +++ .../app/sanitizer_v2/empty_service_name_sanitizer_test.go | 1 + cmd/collector/app/sanitizer_v2/sanitizer.go | 3 +++ cmd/collector/app/sanitizer_v2/service_name_sanitizer.go | 3 +++ cmd/collector/app/sanitizer_v2/utf8_sanitizer.go | 3 +++ 5 files changed, 13 insertions(+) rename cmd/collector/app/sanitizer_v2/{empty.go => empty_service_name_sanitizer.go} (92%) create mode 100644 cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go diff --git a/cmd/collector/app/sanitizer_v2/empty.go b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go similarity index 92% rename from cmd/collector/app/sanitizer_v2/empty.go rename to cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go index 005a6a49588..f34d1a35862 100644 --- a/cmd/collector/app/sanitizer_v2/empty.go +++ b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go @@ -1,3 +1,6 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + package sanitizer_v2 import ( diff --git a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go new file mode 100644 index 00000000000..786c8c21329 --- /dev/null +++ b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go @@ -0,0 +1 @@ +package sanitizer_v2 \ No newline at end of file diff --git a/cmd/collector/app/sanitizer_v2/sanitizer.go b/cmd/collector/app/sanitizer_v2/sanitizer.go index 6d04a05853c..ad9c8f56536 100644 --- a/cmd/collector/app/sanitizer_v2/sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/sanitizer.go @@ -1,3 +1,6 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + package sanitizer_v2 import "go.opentelemetry.io/collector/pdata/ptrace" diff --git a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go index 5f0b6cd22f4..786b77cc991 100644 --- a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go @@ -1,3 +1,6 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + package sanitizer_v2 import ( diff --git a/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go b/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go index fd315723579..87d1afda1aa 100644 --- a/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go @@ -1,3 +1,6 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + package sanitizer_v2 import ( From add2f2b6b6e60afd4bcc7415802a4440c6e66fee Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Mon, 10 Jun 2024 13:36:51 +0530 Subject: [PATCH 03/16] fix lint Signed-off-by: Vamshi Maskuri --- .../empty_service_name_sanitizer_test.go | 5 ++++- cmd/collector/app/sanitizer_v2/package_test.go | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 cmd/collector/app/sanitizer_v2/package_test.go diff --git a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go index 786c8c21329..26bc0a7aa33 100644 --- a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go +++ b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go @@ -1 +1,4 @@ -package sanitizer_v2 \ No newline at end of file +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package sanitizer_v2 diff --git a/cmd/collector/app/sanitizer_v2/package_test.go b/cmd/collector/app/sanitizer_v2/package_test.go new file mode 100644 index 00000000000..8936e347822 --- /dev/null +++ b/cmd/collector/app/sanitizer_v2/package_test.go @@ -0,0 +1,14 @@ +// Copyright (c) 2024 The Jaeger Authors. +// SPDX-License-Identifier: Apache-2.0 + +package sanitizer_v2 + +import ( + "testing" + + "github.com/jaegertracing/jaeger/pkg/testutils" +) + +func TestMain(m *testing.M) { + testutils.VerifyGoLeaks(m) +} From c0a30c32adac5f8bdc317507932978f87ba310c9 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Mon, 10 Jun 2024 23:55:27 +0530 Subject: [PATCH 04/16] remove unrelated code Signed-off-by: Vamshi Maskuri --- cmd/collector/app/sanitizer_v2/sanitizer.go | 9 +++++++-- cmd/collector/app/sanitizer_v2/service_name_sanitizer.go | 6 ------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cmd/collector/app/sanitizer_v2/sanitizer.go b/cmd/collector/app/sanitizer_v2/sanitizer.go index ad9c8f56536..a02ed57a5a9 100644 --- a/cmd/collector/app/sanitizer_v2/sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/sanitizer.go @@ -3,16 +3,21 @@ package sanitizer_v2 -import "go.opentelemetry.io/collector/pdata/ptrace" +import ( + "go.opentelemetry.io/collector/pdata/ptrace" + "go.uber.org/zap" +) // SanitizeSpan sanitizes/normalizes spans. Any business logic that needs to be applied to normalize the contents of a // span should implement this interface. type SanitizeSpan func(span ptrace.Span) ptrace.Span // NewStandardSanitizers are automatically applied by SpanProcessor. -func NewStandardSanitizers() []SanitizeSpan { +func NewStandardSanitizers(logger *zap.Logger, cache Cache) []SanitizeSpan { return []SanitizeSpan{ NewEmptyServiceNameSanitizer(), + NewUTF8Sanitizer(logger), + serviceNameSanitizer{cache: cache}.Sanitize, } } diff --git a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go index 786b77cc991..80f102fa558 100644 --- a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go @@ -14,12 +14,6 @@ type Cache interface { IsEmpty() bool } -// NewServiceNameSanitizer creates a service name sanitizer with a given cache. -func NewServiceNameSanitizer(cache Cache) SanitizeSpan { - sanitizer := serviceNameSanitizer{cache: cache} - return sanitizer.Sanitize -} - // serviceNameSanitizer sanitizes the service names in span annotations given a source of truth alias to service cache. type serviceNameSanitizer struct { cache Cache From fd89e0193ed12b10c49064e9767d72eb9f8085fa Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Tue, 11 Jun 2024 08:24:41 +0530 Subject: [PATCH 05/16] updated sanitizers with ptrace.Traces Signed-off-by: Vamshi Maskuri --- .../empty_service_name_sanitizer.go | 30 +++++++------- cmd/collector/app/sanitizer_v2/sanitizer.go | 14 +++---- .../sanitizer_v2/service_name_sanitizer.go | 30 +++++++------- .../app/sanitizer_v2/utf8_sanitizer.go | 39 ++++++++++++------- 4 files changed, 65 insertions(+), 48 deletions(-) diff --git a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go index f34d1a35862..3a35eb35d84 100644 --- a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go @@ -15,22 +15,24 @@ const ( // NewEmptyServiceNameSanitizer returns a function that replaces empty service names // with a predefined string. -func NewEmptyServiceNameSanitizer() SanitizeSpan { +func NewEmptyServiceNameSanitizer() SanitizeTraces { return sanitizeEmptyServiceName } -// sanitizeEmptyServiceName sanitizes the service names in the span attributes. -func sanitizeEmptyServiceName(span ptrace.Span) ptrace.Span { - attributes := span.Attributes() - serviceNameAttr, ok := attributes.Get("service.name") - - if !ok { - // If service.name is missing, set it to nullProcessServiceName - attributes.PutStr("service.name", nullProcessServiceName) - } else if serviceNameAttr.Str() == "" { - // If service.name is empty, replace it with serviceNameReplacement - attributes.PutStr("service.name", serviceNameReplacement) +// sanitizeEmptyServiceName sanitizes the service names in the resource attributes. +func sanitizeEmptyServiceName(traces ptrace.Traces) ptrace.Traces { + resourceSpans := traces.ResourceSpans() + for i := 0; i < resourceSpans.Len(); i++ { + resourceSpan := resourceSpans.At(i) + attributes := resourceSpan.Resource().Attributes() + serviceNameAttr, ok := attributes.Get("service.name") + if !ok { + // If service.name is missing, set it to nullProcessServiceName + attributes.PutStr("service.name", nullProcessServiceName) + } else if serviceNameAttr.Str() == "" { + // If service.name is empty, replace it with serviceNameReplacement + attributes.PutStr("service.name", serviceNameReplacement) + } } - - return span + return traces } diff --git a/cmd/collector/app/sanitizer_v2/sanitizer.go b/cmd/collector/app/sanitizer_v2/sanitizer.go index a02ed57a5a9..50ac7352e13 100644 --- a/cmd/collector/app/sanitizer_v2/sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/sanitizer.go @@ -10,11 +10,11 @@ import ( // SanitizeSpan sanitizes/normalizes spans. Any business logic that needs to be applied to normalize the contents of a // span should implement this interface. -type SanitizeSpan func(span ptrace.Span) ptrace.Span +type SanitizeTraces func(traces ptrace.Traces) ptrace.Traces // NewStandardSanitizers are automatically applied by SpanProcessor. -func NewStandardSanitizers(logger *zap.Logger, cache Cache) []SanitizeSpan { - return []SanitizeSpan{ +func NewStandardSanitizers(logger *zap.Logger, cache Cache) []SanitizeTraces { + return []SanitizeTraces{ NewEmptyServiceNameSanitizer(), NewUTF8Sanitizer(logger), serviceNameSanitizer{cache: cache}.Sanitize, @@ -23,14 +23,14 @@ func NewStandardSanitizers(logger *zap.Logger, cache Cache) []SanitizeSpan { // NewChainedSanitizer creates a Sanitizer from the variadic list of passed Sanitizers. // If the list only has one element, it is returned directly to minimize indirection. -func NewChainedSanitizer(sanitizers ...SanitizeSpan) SanitizeSpan { +func NewChainedSanitizer(sanitizers ...SanitizeTraces) SanitizeTraces { if len(sanitizers) == 1 { return sanitizers[0] } - return func(span ptrace.Span) ptrace.Span { + return func(traces ptrace.Traces) ptrace.Traces { for _, s := range sanitizers { - span = s(span) + traces = s(traces) } - return span + return traces } } diff --git a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go index 80f102fa558..3d5ad362c5b 100644 --- a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go @@ -20,22 +20,26 @@ type serviceNameSanitizer struct { } // Sanitize sanitizes the service names in the span annotations. -func (s serviceNameSanitizer) Sanitize(span ptrace.Span) ptrace.Span { +func (s serviceNameSanitizer) Sanitize(traces ptrace.Traces) ptrace.Traces { if s.cache.IsEmpty() { - return span + return traces } - attributes := span.Attributes() - serviceNameAttr, exists := attributes.Get("service.name") - if !exists || serviceNameAttr.Type() != pcommon.ValueTypeStr { - return span + resourceSpans := traces.ResourceSpans() + for i := 0; i < resourceSpans.Len(); i++ { + resourceSpan := resourceSpans.At(i) + attributes := resourceSpan.Resource().Attributes() + serviceNameAttr, exists := attributes.Get("service.name") + if !exists || serviceNameAttr.Type() != pcommon.ValueTypeStr { + continue + } + + alias := serviceNameAttr.Str() + serviceName := s.cache.Get(alias) + if serviceName != "" { + attributes.PutStr("service.name", serviceName) + } } - alias := serviceNameAttr.Str() - serviceName := s.cache.Get(alias) - if serviceName != "" { - attributes.PutStr("service.name", serviceName) - } - - return span + return traces } diff --git a/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go b/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go index 87d1afda1aa..57605bc78f6 100644 --- a/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go +++ b/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go @@ -24,26 +24,37 @@ type UTF8Sanitizer struct { } // NewUTF8Sanitizer creates a UTF8 sanitizer with logging functionality. -func NewUTF8Sanitizer(logger *zap.Logger) SanitizeSpan { +func NewUTF8Sanitizer(logger *zap.Logger) SanitizeTraces { return UTF8Sanitizer{logger: logger}.Sanitize } // Sanitize sanitizes the UTF8 in the spans. -func (s UTF8Sanitizer) Sanitize(span ptrace.Span) ptrace.Span { - if !utf8.ValidString(span.Name()) { - s.logger.Info("Invalid utf8 operation name", zap.String("operation_name", span.Name())) - span.SetName(invalidOperation) - } +func (s UTF8Sanitizer) Sanitize(traces ptrace.Traces) ptrace.Traces { + resourceSpans := traces.ResourceSpans() + for i := 0; i < resourceSpans.Len(); i++ { + resourceSpan := resourceSpans.At(i) + scopeSpans := resourceSpan.ScopeSpans() + for j := 0; j < scopeSpans.Len(); j++ { + spans := scopeSpans.At(j).Spans() + for k := 0; k < spans.Len(); k++ { + span := spans.At(k) + if !utf8.ValidString(span.Name()) { + s.logger.Info("Invalid utf8 operation name", zap.String("operation_name", span.Name())) + span.SetName(invalidOperation) + } - attributes := span.Attributes() - serviceNameAttr, ok := attributes.Get("service.name") - if ok && !utf8.ValidString(serviceNameAttr.Str()) { - s.logger.Info("Invalid utf8 service name", zap.String("service_name", serviceNameAttr.Str())) - attributes.PutStr("service.name", invalidService) - } + attributes := span.Attributes() + serviceNameAttr, ok := attributes.Get("service.name") + if ok && !utf8.ValidString(serviceNameAttr.Str()) { + s.logger.Info("Invalid utf8 service name", zap.String("service_name", serviceNameAttr.Str())) + attributes.PutStr("service.name", invalidService) + } - sanitizeAttributes(attributes) - return span + sanitizeAttributes(attributes) + } + } + } + return traces } // sanitizeAttributes sanitizes attributes to ensure UTF8 validity. From 53ae6bd5675631f620f9f187569be99bb02f171b Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 07:26:25 +0530 Subject: [PATCH 06/16] rename directory Signed-off-by: Vamshi Maskuri --- .../storageexporter/sanitizer}/empty_service_name_sanitizer.go | 2 +- .../sanitizer}/empty_service_name_sanitizer_test.go | 2 +- .../exporters/storageexporter/sanitizer}/package_test.go | 2 +- .../internal/exporters/storageexporter/sanitizer}/sanitizer.go | 2 +- .../storageexporter/sanitizer}/service_name_sanitizer.go | 2 +- .../exporters/storageexporter/sanitizer}/utf8_sanitizer.go | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) rename cmd/{collector/app/sanitizer_v2 => jaeger/internal/exporters/storageexporter/sanitizer}/empty_service_name_sanitizer.go (98%) rename cmd/{collector/app/sanitizer_v2 => jaeger/internal/exporters/storageexporter/sanitizer}/empty_service_name_sanitizer_test.go (79%) rename cmd/{collector/app/sanitizer_v2 => jaeger/internal/exporters/storageexporter/sanitizer}/package_test.go (91%) rename cmd/{collector/app/sanitizer_v2 => jaeger/internal/exporters/storageexporter/sanitizer}/sanitizer.go (98%) rename cmd/{collector/app/sanitizer_v2 => jaeger/internal/exporters/storageexporter/sanitizer}/service_name_sanitizer.go (98%) rename cmd/{collector/app/sanitizer_v2 => jaeger/internal/exporters/storageexporter/sanitizer}/utf8_sanitizer.go (98%) diff --git a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go similarity index 98% rename from cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go rename to cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go index 3a35eb35d84..7afc9486f99 100644 --- a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go @@ -1,7 +1,7 @@ // Copyright (c) 2024 The Jaeger Authors. // SPDX-License-Identifier: Apache-2.0 -package sanitizer_v2 +package sanitizer import ( "go.opentelemetry.io/collector/pdata/ptrace" diff --git a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer_test.go similarity index 79% rename from cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go rename to cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer_test.go index 26bc0a7aa33..a6140c0f47a 100644 --- a/cmd/collector/app/sanitizer_v2/empty_service_name_sanitizer_test.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer_test.go @@ -1,4 +1,4 @@ // Copyright (c) 2024 The Jaeger Authors. // SPDX-License-Identifier: Apache-2.0 -package sanitizer_v2 +package sanitizer diff --git a/cmd/collector/app/sanitizer_v2/package_test.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/package_test.go similarity index 91% rename from cmd/collector/app/sanitizer_v2/package_test.go rename to cmd/jaeger/internal/exporters/storageexporter/sanitizer/package_test.go index 8936e347822..a7d22934b92 100644 --- a/cmd/collector/app/sanitizer_v2/package_test.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/package_test.go @@ -1,7 +1,7 @@ // Copyright (c) 2024 The Jaeger Authors. // SPDX-License-Identifier: Apache-2.0 -package sanitizer_v2 +package sanitizer import ( "testing" diff --git a/cmd/collector/app/sanitizer_v2/sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go similarity index 98% rename from cmd/collector/app/sanitizer_v2/sanitizer.go rename to cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go index 50ac7352e13..d8310bb0a04 100644 --- a/cmd/collector/app/sanitizer_v2/sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go @@ -1,7 +1,7 @@ // Copyright (c) 2024 The Jaeger Authors. // SPDX-License-Identifier: Apache-2.0 -package sanitizer_v2 +package sanitizer import ( "go.opentelemetry.io/collector/pdata/ptrace" diff --git a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/service_name_sanitizer.go similarity index 98% rename from cmd/collector/app/sanitizer_v2/service_name_sanitizer.go rename to cmd/jaeger/internal/exporters/storageexporter/sanitizer/service_name_sanitizer.go index 3d5ad362c5b..263de5342cd 100644 --- a/cmd/collector/app/sanitizer_v2/service_name_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/service_name_sanitizer.go @@ -1,7 +1,7 @@ // Copyright (c) 2024 The Jaeger Authors. // SPDX-License-Identifier: Apache-2.0 -package sanitizer_v2 +package sanitizer import ( "go.opentelemetry.io/collector/pdata/pcommon" diff --git a/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go similarity index 98% rename from cmd/collector/app/sanitizer_v2/utf8_sanitizer.go rename to cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go index 57605bc78f6..92077f54b2c 100644 --- a/cmd/collector/app/sanitizer_v2/utf8_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go @@ -1,7 +1,7 @@ // Copyright (c) 2024 The Jaeger Authors. // SPDX-License-Identifier: Apache-2.0 -package sanitizer_v2 +package sanitizer import ( "fmt" From 0ef93f3fc957807eca5034c2ea5d341058a4340b Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 07:32:20 +0530 Subject: [PATCH 07/16] remove service-name-sanitizer in v2 Signed-off-by: Vamshi Maskuri --- .../storageexporter/sanitizer/sanitizer.go | 3 +- .../sanitizer/service_name_sanitizer.go | 45 ------------------- 2 files changed, 1 insertion(+), 47 deletions(-) delete mode 100644 cmd/jaeger/internal/exporters/storageexporter/sanitizer/service_name_sanitizer.go diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go index d8310bb0a04..8cc18432824 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go @@ -13,11 +13,10 @@ import ( type SanitizeTraces func(traces ptrace.Traces) ptrace.Traces // NewStandardSanitizers are automatically applied by SpanProcessor. -func NewStandardSanitizers(logger *zap.Logger, cache Cache) []SanitizeTraces { +func NewStandardSanitizers(logger *zap.Logger) []SanitizeTraces { return []SanitizeTraces{ NewEmptyServiceNameSanitizer(), NewUTF8Sanitizer(logger), - serviceNameSanitizer{cache: cache}.Sanitize, } } diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/service_name_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/service_name_sanitizer.go deleted file mode 100644 index 263de5342cd..00000000000 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/service_name_sanitizer.go +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2024 The Jaeger Authors. -// SPDX-License-Identifier: Apache-2.0 - -package sanitizer - -import ( - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/ptrace" -) - -// Cache interface similar to the one in V1 -type Cache interface { - Get(alias string) string - IsEmpty() bool -} - -// serviceNameSanitizer sanitizes the service names in span annotations given a source of truth alias to service cache. -type serviceNameSanitizer struct { - cache Cache -} - -// Sanitize sanitizes the service names in the span annotations. -func (s serviceNameSanitizer) Sanitize(traces ptrace.Traces) ptrace.Traces { - if s.cache.IsEmpty() { - return traces - } - - resourceSpans := traces.ResourceSpans() - for i := 0; i < resourceSpans.Len(); i++ { - resourceSpan := resourceSpans.At(i) - attributes := resourceSpan.Resource().Attributes() - serviceNameAttr, exists := attributes.Get("service.name") - if !exists || serviceNameAttr.Type() != pcommon.ValueTypeStr { - continue - } - - alias := serviceNameAttr.Str() - serviceName := s.cache.Get(alias) - if serviceName != "" { - attributes.PutStr("service.name", serviceName) - } - } - - return traces -} From 7997c1b1ea0af3a730c85450d6f12f03c3d10cf7 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 07:34:52 +0530 Subject: [PATCH 08/16] add compatibility section Signed-off-by: Vamshi Maskuri --- cmd/jaeger/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cmd/jaeger/README.md b/cmd/jaeger/README.md index e09ae8f537e..81cf5961a24 100644 --- a/cmd/jaeger/README.md +++ b/cmd/jaeger/README.md @@ -2,3 +2,9 @@ This is experimental Jaeger V2 based on OpenTelemetry collector. See https://github.com/jaegertracing/jaeger/issues/4843. + +## Compatibility + +### Service Name Sanitizer + +In v1, there was a `serviceNameSanitizer` that sanitized the service names in span annotations using a source of truth alias to service cache. This functionality has been removed in v2. If your implementation relies on this sanitizer, you will need to find a different way to integrate this functionality, such as implementing a custom processor. \ No newline at end of file From 48a5229477d33aa9d5bbeadb397268f54db1cfd5 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 07:54:18 +0530 Subject: [PATCH 09/16] add badUTF8 prefix Signed-off-by: Vamshi Maskuri --- .../storageexporter/sanitizer/sanitizer.go | 5 +-- .../sanitizer/utf8_sanitizer.go | 38 +++++++++++-------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go index 8cc18432824..748c46a9965 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go @@ -5,7 +5,6 @@ package sanitizer import ( "go.opentelemetry.io/collector/pdata/ptrace" - "go.uber.org/zap" ) // SanitizeSpan sanitizes/normalizes spans. Any business logic that needs to be applied to normalize the contents of a @@ -13,10 +12,10 @@ import ( type SanitizeTraces func(traces ptrace.Traces) ptrace.Traces // NewStandardSanitizers are automatically applied by SpanProcessor. -func NewStandardSanitizers(logger *zap.Logger) []SanitizeTraces { +func NewStandardSanitizers() []SanitizeTraces { return []SanitizeTraces{ NewEmptyServiceNameSanitizer(), - NewUTF8Sanitizer(logger), + NewUTF8Sanitizer(), } } diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go index 92077f54b2c..19e6f45715b 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go @@ -4,32 +4,26 @@ package sanitizer import ( - "fmt" "unicode/utf8" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/ptrace" - "go.uber.org/zap" ) const ( invalidOperation = "InvalidOperationName" invalidService = "InvalidServiceName" invalidTagKey = "InvalidTagKey" + badUTF8Prefix = "bad_utf8_" ) -// UTF8Sanitizer sanitizes all strings in spans. -type UTF8Sanitizer struct { - logger *zap.Logger +// NewUTF8Sanitizer creates a UTF8 sanitizer. +func NewUTF8Sanitizer() SanitizeTraces { + return sanitizeUF8 } -// NewUTF8Sanitizer creates a UTF8 sanitizer with logging functionality. -func NewUTF8Sanitizer(logger *zap.Logger) SanitizeTraces { - return UTF8Sanitizer{logger: logger}.Sanitize -} - -// Sanitize sanitizes the UTF8 in the spans. -func (s UTF8Sanitizer) Sanitize(traces ptrace.Traces) ptrace.Traces { +// sanitizeUTF8 sanitizes the UTF8 in the spans. +func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { resourceSpans := traces.ResourceSpans() for i := 0; i < resourceSpans.Len(); i++ { resourceSpan := resourceSpans.At(i) @@ -39,15 +33,19 @@ func (s UTF8Sanitizer) Sanitize(traces ptrace.Traces) ptrace.Traces { for k := 0; k < spans.Len(); k++ { span := spans.At(k) if !utf8.ValidString(span.Name()) { - s.logger.Info("Invalid utf8 operation name", zap.String("operation_name", span.Name())) + originalName := span.Name() span.SetName(invalidOperation) + byteSlice := span.Attributes().PutEmptyBytes(badUTF8Prefix + "operation_name") + byteSlice.FromRaw([]byte(originalName)) } attributes := span.Attributes() serviceNameAttr, ok := attributes.Get("service.name") if ok && !utf8.ValidString(serviceNameAttr.Str()) { - s.logger.Info("Invalid utf8 service name", zap.String("service_name", serviceNameAttr.Str())) + originalServiceName := serviceNameAttr.Str() attributes.PutStr("service.name", invalidService) + byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + "service.name") + byteSlice.FromRaw([]byte(originalServiceName)) } sanitizeAttributes(attributes) @@ -60,8 +58,16 @@ func (s UTF8Sanitizer) Sanitize(traces ptrace.Traces) ptrace.Traces { // sanitizeAttributes sanitizes attributes to ensure UTF8 validity. func sanitizeAttributes(attributes pcommon.Map) { attributes.Range(func(k string, v pcommon.Value) bool { - if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { - attributes.PutStr(k, fmt.Sprintf("%s:%s", k, v.Str())) + if !utf8.ValidString(k) { + originalKey := k + k = invalidTagKey + byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + originalKey) + byteSlice.FromRaw([]byte(originalKey)) + } else if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { + originalValue := v.Str() + attributes.PutStr(k, invalidTagKey) + byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + k) + byteSlice.FromRaw([]byte(originalValue)) } return true }) From cdd2633cc926dbe3de2040d701a99ff12ab6eeba Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 08:07:50 +0530 Subject: [PATCH 10/16] fix lint Signed-off-by: Vamshi Maskuri --- .../exporters/storageexporter/sanitizer/utf8_sanitizer.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go index 19e6f45715b..ff0c6fd3c23 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go @@ -32,6 +32,7 @@ func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { spans := scopeSpans.At(j).Spans() for k := 0; k < spans.Len(); k++ { span := spans.At(k) + // Sanitize operation name if !utf8.ValidString(span.Name()) { originalName := span.Name() span.SetName(invalidOperation) @@ -39,6 +40,7 @@ func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { byteSlice.FromRaw([]byte(originalName)) } + // Sanitize service name attribute attributes := span.Attributes() serviceNameAttr, ok := attributes.Get("service.name") if ok && !utf8.ValidString(serviceNameAttr.Str()) { @@ -58,12 +60,14 @@ func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { // sanitizeAttributes sanitizes attributes to ensure UTF8 validity. func sanitizeAttributes(attributes pcommon.Map) { attributes.Range(func(k string, v pcommon.Value) bool { + // Handle invalid UTF8 in attribute keys if !utf8.ValidString(k) { originalKey := k - k = invalidTagKey + attributes.PutStr(invalidTagKey, k) byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + originalKey) byteSlice.FromRaw([]byte(originalKey)) } else if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { + // Handle invalid UTF8 in attribute values originalValue := v.Str() attributes.PutStr(k, invalidTagKey) byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + k) From 2c6fe5c2ddcd6d091a8dc46063317328a3db0e65 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 08:14:00 +0530 Subject: [PATCH 11/16] fix Signed-off-by: Vamshi Maskuri --- .../exporters/storageexporter/sanitizer/utf8_sanitizer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go index ff0c6fd3c23..1004b9b6f6a 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go @@ -40,7 +40,7 @@ func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { byteSlice.FromRaw([]byte(originalName)) } - // Sanitize service name attribute + // Sanitize service name attribute attributes := span.Attributes() serviceNameAttr, ok := attributes.Get("service.name") if ok && !utf8.ValidString(serviceNameAttr.Str()) { From 7f0e9ccdee390ee939856bf7b0aff5271d4d1ad0 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Wed, 12 Jun 2024 08:36:59 +0530 Subject: [PATCH 12/16] Update cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go Co-authored-by: Yuri Shkuro Signed-off-by: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> --- .../internal/exporters/storageexporter/sanitizer/sanitizer.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go index 748c46a9965..5acc5f0a349 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/sanitizer.go @@ -7,8 +7,7 @@ import ( "go.opentelemetry.io/collector/pdata/ptrace" ) -// SanitizeSpan sanitizes/normalizes spans. Any business logic that needs to be applied to normalize the contents of a -// span should implement this interface. +// SanitizeTraces is a function that performs enrichment, clean-up, or normalization of trace data. type SanitizeTraces func(traces ptrace.Traces) ptrace.Traces // NewStandardSanitizers are automatically applied by SpanProcessor. From 1d46f5d8e06dd14faacd8a6a03bf0562a3b6404d Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 21:50:20 +0530 Subject: [PATCH 13/16] updated utf8 sanitizer Signed-off-by: Vamshi Maskuri --- .../sanitizer/utf8_sanitizer.go | 68 +++++++++++-------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go index 1004b9b6f6a..efda857e7a5 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go @@ -12,26 +12,35 @@ import ( const ( invalidOperation = "InvalidOperationName" - invalidService = "InvalidServiceName" invalidTagKey = "InvalidTagKey" badUTF8Prefix = "bad_utf8_" ) // NewUTF8Sanitizer creates a UTF8 sanitizer. func NewUTF8Sanitizer() SanitizeTraces { - return sanitizeUF8 + return sanitizeUTF8 } -// sanitizeUTF8 sanitizes the UTF8 in the spans. -func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { +// sanitizeUTF8 sanitizes the UTF8 in the traces. +func sanitizeUTF8(traces ptrace.Traces) ptrace.Traces { resourceSpans := traces.ResourceSpans() for i := 0; i < resourceSpans.Len(); i++ { resourceSpan := resourceSpans.At(i) + + // Sanitize resource attributes + sanitizeAttributes(resourceSpan.Resource().Attributes()) + scopeSpans := resourceSpan.ScopeSpans() for j := 0; j < scopeSpans.Len(); j++ { - spans := scopeSpans.At(j).Spans() + scopeSpan := scopeSpans.At(j) + + // Sanitize scope attributes + sanitizeAttributes(scopeSpan.Scope().Attributes()) + + spans := scopeSpan.Spans() for k := 0; k < spans.Len(); k++ { span := spans.At(k) + // Sanitize operation name if !utf8.ValidString(span.Name()) { originalName := span.Name() @@ -40,17 +49,8 @@ func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { byteSlice.FromRaw([]byte(originalName)) } - // Sanitize service name attribute - attributes := span.Attributes() - serviceNameAttr, ok := attributes.Get("service.name") - if ok && !utf8.ValidString(serviceNameAttr.Str()) { - originalServiceName := serviceNameAttr.Str() - attributes.PutStr("service.name", invalidService) - byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + "service.name") - byteSlice.FromRaw([]byte(originalServiceName)) - } - - sanitizeAttributes(attributes) + // Sanitize span attributes + sanitizeAttributes(span.Attributes()) } } } @@ -59,20 +59,34 @@ func sanitizeUF8(traces ptrace.Traces) ptrace.Traces { // sanitizeAttributes sanitizes attributes to ensure UTF8 validity. func sanitizeAttributes(attributes pcommon.Map) { + // Collect invalid keys and values during iteration + var invalidKeys []string + invalidValues := make(map[string]string) + attributes.Range(func(k string, v pcommon.Value) bool { - // Handle invalid UTF8 in attribute keys + // Handle invalid UTF-8 in attribute keys if !utf8.ValidString(k) { - originalKey := k - attributes.PutStr(invalidTagKey, k) - byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + originalKey) - byteSlice.FromRaw([]byte(originalKey)) - } else if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { - // Handle invalid UTF8 in attribute values - originalValue := v.Str() - attributes.PutStr(k, invalidTagKey) - byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + k) - byteSlice.FromRaw([]byte(originalValue)) + invalidKeys = append(invalidKeys, k) + } + // Handle invalid UTF-8 in attribute values + if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { + invalidValues[k] = v.Str() } return true }) + + // Apply collected changes after iteration + for _, k := range invalidKeys { + originalKey := k + attributes.PutStr(invalidTagKey, k) + byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + originalKey) + byteSlice.FromRaw([]byte(originalKey)) + } + + for k, v := range invalidValues { + originalValue := v + attributes.PutStr(k, invalidTagKey) + byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + k) + byteSlice.FromRaw([]byte(originalValue)) + } } From 3464d255abda5f001d09c03dc4256ae8bb51bf3a Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Wed, 12 Jun 2024 21:54:55 +0530 Subject: [PATCH 14/16] rename vars Co-authored-by: Yuri Shkuro Signed-off-by: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> --- .../storageexporter/sanitizer/empty_service_name_sanitizer.go | 2 +- .../exporters/storageexporter/sanitizer/utf8_sanitizer.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go index 7afc9486f99..46c6b3c965f 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go @@ -10,7 +10,7 @@ import ( // Constants for the replacement names const ( serviceNameReplacement = "empty-service-name" - nullProcessServiceName = "null-process-and-service-name" + missingServiceName = "missing-service-name" ) // NewEmptyServiceNameSanitizer returns a function that replaces empty service names diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go index efda857e7a5..3ee174b8a54 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go @@ -45,7 +45,7 @@ func sanitizeUTF8(traces ptrace.Traces) ptrace.Traces { if !utf8.ValidString(span.Name()) { originalName := span.Name() span.SetName(invalidOperation) - byteSlice := span.Attributes().PutEmptyBytes(badUTF8Prefix + "operation_name") + binaryAttr := span.Attributes().PutEmptyBytes(badUTF8Prefix + "operation_name") byteSlice.FromRaw([]byte(originalName)) } From 786a279e89461d38871d72171e7da4e3a3711915 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 21:58:24 +0530 Subject: [PATCH 15/16] fix Signed-off-by: Vamshi Maskuri --- .../sanitizer/empty_service_name_sanitizer.go | 4 ++-- .../storageexporter/sanitizer/utf8_sanitizer.go | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go index 46c6b3c965f..409a6f19e5d 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go @@ -10,7 +10,7 @@ import ( // Constants for the replacement names const ( serviceNameReplacement = "empty-service-name" - missingServiceName = "missing-service-name" + missingServiceName = "missing-service-name" ) // NewEmptyServiceNameSanitizer returns a function that replaces empty service names @@ -28,7 +28,7 @@ func sanitizeEmptyServiceName(traces ptrace.Traces) ptrace.Traces { serviceNameAttr, ok := attributes.Get("service.name") if !ok { // If service.name is missing, set it to nullProcessServiceName - attributes.PutStr("service.name", nullProcessServiceName) + attributes.PutStr("service.name", missingServiceName) } else if serviceNameAttr.Str() == "" { // If service.name is empty, replace it with serviceNameReplacement attributes.PutStr("service.name", serviceNameReplacement) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go index 3ee174b8a54..02f6aa0025f 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/utf8_sanitizer.go @@ -46,7 +46,7 @@ func sanitizeUTF8(traces ptrace.Traces) ptrace.Traces { originalName := span.Name() span.SetName(invalidOperation) binaryAttr := span.Attributes().PutEmptyBytes(badUTF8Prefix + "operation_name") - byteSlice.FromRaw([]byte(originalName)) + binaryAttr.FromRaw([]byte(originalName)) } // Sanitize span attributes @@ -79,14 +79,14 @@ func sanitizeAttributes(attributes pcommon.Map) { for _, k := range invalidKeys { originalKey := k attributes.PutStr(invalidTagKey, k) - byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + originalKey) - byteSlice.FromRaw([]byte(originalKey)) + binaryAttr := attributes.PutEmptyBytes(badUTF8Prefix + originalKey) + binaryAttr.FromRaw([]byte(originalKey)) } for k, v := range invalidValues { originalValue := v attributes.PutStr(k, invalidTagKey) - byteSlice := attributes.PutEmptyBytes(badUTF8Prefix + k) - byteSlice.FromRaw([]byte(originalValue)) + binaryAttr := attributes.PutEmptyBytes(badUTF8Prefix + k) + binaryAttr.FromRaw([]byte(originalValue)) } } From 28def9c148dcb2398c0913b9dc3e9c6fd9dcfff6 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri Date: Wed, 12 Jun 2024 22:04:01 +0530 Subject: [PATCH 16/16] fix vars Signed-off-by: Vamshi Maskuri --- .../sanitizer/empty_service_name_sanitizer.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go index 409a6f19e5d..556e5ccb5bc 100644 --- a/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go +++ b/cmd/jaeger/internal/exporters/storageexporter/sanitizer/empty_service_name_sanitizer.go @@ -9,8 +9,8 @@ import ( // Constants for the replacement names const ( - serviceNameReplacement = "empty-service-name" - missingServiceName = "missing-service-name" + emptyServiceName = "empty-service-name" + missingServiceName = "missing-service-name" ) // NewEmptyServiceNameSanitizer returns a function that replaces empty service names @@ -31,7 +31,7 @@ func sanitizeEmptyServiceName(traces ptrace.Traces) ptrace.Traces { attributes.PutStr("service.name", missingServiceName) } else if serviceNameAttr.Str() == "" { // If service.name is empty, replace it with serviceNameReplacement - attributes.PutStr("service.name", serviceNameReplacement) + attributes.PutStr("service.name", emptyServiceName) } } return traces