Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[exporter/elasticsearch] Add sanitization utils for datastream fields #35494

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f01217c
add sanitization utils for datastream fields
rubvs Sep 30, 2024
f30ab50
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Sep 30, 2024
e973b09
test: add sanitization check for exportering datastream
rubvs Sep 30, 2024
fe77f13
Merge branch 'elastic-sanitize-datastream-fields' of github.com:rubvs…
rubvs Sep 30, 2024
bf248b3
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Sep 30, 2024
4383f67
improve sanitization func to leverage inlining
rubvs Oct 1, 2024
8bbbc8c
Merge branch 'elastic-sanitize-datastream-fields' of github.com:rubvs…
rubvs Oct 1, 2024
bc3bd6b
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Oct 1, 2024
86cc2ae
minor change to datastream sanitization func
rubvs Oct 1, 2024
d69cd4a
Merge branch 'elastic-sanitize-datastream-fields' of github.com:rubvs…
rubvs Oct 1, 2024
188300c
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Oct 1, 2024
71ffaca
add changelog
rubvs Oct 1, 2024
5d771ce
Merge branch 'elastic-sanitize-datastream-fields' of github.com:rubvs…
rubvs Oct 1, 2024
43e55d2
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Oct 1, 2024
9e3564e
fix linting issue in comment
rubvs Oct 1, 2024
8bdea23
Merge branch 'elastic-sanitize-datastream-fields' of github.com:rubvs…
rubvs Oct 1, 2024
4020fc7
minor changes
rubvs Oct 3, 2024
f85b3a5
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Oct 3, 2024
a868f0a
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Oct 3, 2024
d89ed94
doc: add reference to data stream field restrictions
rubvs Oct 8, 2024
4bafdcf
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Oct 11, 2024
ce54d20
Merge branch 'main' into elastic-sanitize-datastream-fields
rubvs Oct 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion exporter/elasticsearchexporter/data_stream_router.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ func routeWithDefaults(defaultDSType string) func(
}
}

// Receiver-based routing
dataset = sanitizeDataStreamDataset(dataset)
namespace = sanitizeDataStreamNamespace(namespace)
rubvs marked this conversation as resolved.
Show resolved Hide resolved

// Receiver-based routing
// For example, hostmetricsreceiver (or hostmetricsreceiver.otel in the OTel output mode)
rubvs marked this conversation as resolved.
Show resolved Hide resolved
// for the scope name
// github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper
Expand Down
10 changes: 7 additions & 3 deletions exporter/elasticsearchexporter/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"math"
"net/http"
"runtime"
"strings"
"sync"
"sync/atomic"
"testing"
Expand Down Expand Up @@ -216,7 +217,10 @@ func TestExporterLogs(t *testing.T) {
server := newESTestServer(t, func(docs []itemRequest) ([]itemResponse, error) {
rec.Record(docs)

assert.Equal(t, "logs-record.dataset-resource.namespace", actionJSONToIndex(t, docs[0].Action))
ds := "record.dataset" + strings.Repeat("_", len(disallowedDatasetRunes))
rubvs marked this conversation as resolved.
Show resolved Hide resolved
ns := "resource.namespace" + strings.Repeat("_", len(disallowedNamespaceRunes))

assert.Equal(t, fmt.Sprintf("logs-%s-%s", ds, ns), actionJSONToIndex(t, docs[0].Action))
rubvs marked this conversation as resolved.
Show resolved Hide resolved

return itemsAllOK(docs)
})
Expand All @@ -226,12 +230,12 @@ func TestExporterLogs(t *testing.T) {
})
logs := newLogsWithAttributes(
map[string]any{
dataStreamDataset: "record.dataset",
dataStreamDataset: "record.dataset" + disallowedDatasetRunes,
rubvs marked this conversation as resolved.
Show resolved Hide resolved
},
nil,
map[string]any{
dataStreamDataset: "resource.dataset",
dataStreamNamespace: "resource.namespace",
dataStreamNamespace: "resource.namespace" + disallowedNamespaceRunes,
rubvs marked this conversation as resolved.
Show resolved Hide resolved
},
)
logs.ResourceLogs().At(0).ScopeLogs().At(0).LogRecords().At(0).Body().SetStr("hello world")
Expand Down
17 changes: 17 additions & 0 deletions exporter/elasticsearchexporter/model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,9 @@ func decodeOTelID(data []byte) ([]byte, error) {
}

func TestEncodeLogOtelMode(t *testing.T) {
randomString := strings.Repeat("abcdefghijklmnopqrstuvwxyz0123456789", 10)
maxLenNamespace := maxDataStreamBytes - len(disallowedNamespaceRunes)
maxLenDataset := maxDataStreamBytes - len(disallowedDatasetRunes)

tests := []struct {
name string
Expand Down Expand Up @@ -1044,6 +1047,20 @@ func TestEncodeLogOtelMode(t *testing.T) {
return assignDatastreamData(or, "", "third.otel")
},
},
{
name: "sanitize dataset/namespace",
rec: buildOTelRecordTestData(t, func(or OTelRecord) OTelRecord {
or.Attributes["data_stream.dataset"] = disallowedDatasetRunes + randomString
or.Attributes["data_stream.namespace"] = disallowedNamespaceRunes + randomString
return or
}),
wantFn: func(or OTelRecord) OTelRecord {
deleteDatasetAttributes(or)
ds := strings.Repeat("_", len(disallowedDatasetRunes)) + randomString[:maxLenDataset] + ".otel"
ns := strings.Repeat("_", len(disallowedNamespaceRunes)) + randomString[:maxLenNamespace]
return assignDatastreamData(or, "", ds, ns)
},
},
}

m := encodeModel{
Expand Down
38 changes: 38 additions & 0 deletions exporter/elasticsearchexporter/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,49 @@ package elasticsearchexporter // import "github.com/open-telemetry/opentelemetry
import (
"bytes"
"fmt"
"strings"
"time"
"unicode"

"github.com/lestrrat-go/strftime"
)

const (
maxDataStreamBytes = 100
disallowedNamespaceRunes = "\\/*?\"<>| ,#:"
disallowedDatasetRunes = "-\\/*?\"<>| ,#:"
)

// Sanitize the datastream fields (dataset, namespace) to apply restrictions
// as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html
func sanitizeDataStreamDataset(field string) string {
field = strings.Map(replaceReservedRune(disallowedDatasetRunes), field)
if len(field) > maxDataStreamBytes {
return field[:maxDataStreamBytes]
}

return field
}

// Sanitize the datastream fields (dataset, namespace) to apply restrictions
// as outlined in https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html
func sanitizeDataStreamNamespace(field string) string {
field = strings.Map(replaceReservedRune(disallowedNamespaceRunes), field)
if len(field) > maxDataStreamBytes {
return field[:maxDataStreamBytes]
}
return field
}
rubvs marked this conversation as resolved.
Show resolved Hide resolved

func replaceReservedRune(disallowedRunes string) func(r rune) rune {
return func(r rune) rune {
if strings.ContainsRune(disallowedRunes, r) {
return '_'
}
return unicode.ToLower(r)
}
}

rubvs marked this conversation as resolved.
Show resolved Hide resolved
func generateIndexWithLogstashFormat(index string, conf *LogstashFormatSettings, t time.Time) (string, error) {
if conf.Enabled {
partIndex := fmt.Sprintf("%s%s", index, conf.PrefixSeparator)
Expand Down