From 348508b823cc10485609a3ed0dec6a8300f8aec3 Mon Sep 17 00:00:00 2001 From: Joe Elliott Date: Tue, 14 Nov 2023 09:31:10 -0500 Subject: [PATCH] TraceQL: Improve regexp perf (#3139) * improve regexp perf Signed-off-by: Joe Elliott * changelog Signed-off-by: Joe Elliott * added invalid regex check Signed-off-by: Joe Elliott --------- Signed-off-by: Joe Elliott --- CHANGELOG.md | 1 + pkg/traceql/ast.go | 11 +++++++---- pkg/traceql/ast_conditions.go | 2 +- pkg/traceql/ast_execute.go | 18 +++++++++++++++--- pkg/traceql/ast_stringer.go | 2 +- pkg/traceql/ast_validate.go | 15 +++++++++++++-- pkg/traceql/test_examples.yaml | 3 +++ 7 files changed, 41 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e92a85b2ed..4e90bc11492 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * [FEATURE] Add per-tenant compaction window [#3129](https://github.com/grafana/tempo/pull/3129) (@zalegrala) * [BUGFIX] Include statusMessage intrinsic attribute in tag search. [#3084](https://github.com/grafana/tempo/pull/3084) (@rcrowe) * [ENHANCEMENT] Update poller to make use of previous results and reduce backend load. [#2652](https://github.com/grafana/tempo/pull/2652) (@zalegrala) +* [ENHANCEMENT] Improve TraceQL regex performance in certain queries. [#3139](https://github.com/grafana/tempo/pull/3139) (@joe-elliott) ## v2.3.0 / 2023-10-30 diff --git a/pkg/traceql/ast.go b/pkg/traceql/ast.go index ced59a785d0..b1343d67aa9 100644 --- a/pkg/traceql/ast.go +++ b/pkg/traceql/ast.go @@ -3,6 +3,7 @@ package traceql import ( "fmt" "math" + "regexp" "time" ) @@ -368,10 +369,12 @@ type BinaryOperation struct { Op Operator LHS FieldExpression RHS FieldExpression + + compiledExpression *regexp.Regexp } -func newBinaryOperation(op Operator, lhs FieldExpression, rhs FieldExpression) BinaryOperation { - return BinaryOperation{ +func newBinaryOperation(op Operator, lhs FieldExpression, rhs FieldExpression) *BinaryOperation { + return &BinaryOperation{ Op: op, LHS: lhs, RHS: rhs, @@ -381,7 +384,7 @@ func newBinaryOperation(op Operator, lhs FieldExpression, rhs FieldExpression) B // nolint: revive func (BinaryOperation) __fieldExpression() {} -func (o BinaryOperation) impliedType() StaticType { +func (o *BinaryOperation) impliedType() StaticType { if o.Op.isBoolean() { return TypeBoolean } @@ -396,7 +399,7 @@ func (o BinaryOperation) impliedType() StaticType { return o.RHS.impliedType() } -func (o BinaryOperation) referencesSpan() bool { +func (o *BinaryOperation) referencesSpan() bool { return o.LHS.referencesSpan() || o.RHS.referencesSpan() } diff --git a/pkg/traceql/ast_conditions.go b/pkg/traceql/ast_conditions.go index cb3c325cbdd..d723ae1deed 100644 --- a/pkg/traceql/ast_conditions.go +++ b/pkg/traceql/ast_conditions.go @@ -31,7 +31,7 @@ func (o SelectOperation) extractConditions(request *FetchSpansRequest) { request.SecondPassConditions = append(request.SecondPassConditions, selectR.Conditions...) } -func (o BinaryOperation) extractConditions(request *FetchSpansRequest) { +func (o *BinaryOperation) extractConditions(request *FetchSpansRequest) { // TODO we can further optimise this by attempting to execute every FieldExpression, if they only contain statics it should resolve switch o.LHS.(type) { case Attribute: diff --git a/pkg/traceql/ast_execute.go b/pkg/traceql/ast_execute.go index 4c0a36a2bbf..ae786ff89a3 100644 --- a/pkg/traceql/ast_execute.go +++ b/pkg/traceql/ast_execute.go @@ -319,7 +319,7 @@ func (a Aggregate) evaluate(input []*Spanset) (output []*Spanset, err error) { return output, nil } -func (o BinaryOperation) execute(span Span) (Static, error) { +func (o *BinaryOperation) execute(span Span) (Static, error) { lhs, err := o.LHS.execute(span) if err != nil { return NewStaticNil(), err @@ -381,10 +381,22 @@ func (o BinaryOperation) execute(span Span) (Static, error) { case OpNotEqual: return NewStaticBool(!lhs.Equals(rhs)), nil case OpRegex: - matched, err := regexp.MatchString(rhs.S, lhs.S) + if o.compiledExpression == nil { + o.compiledExpression, err = regexp.Compile(rhs.S) + if err != nil { + return NewStaticNil(), err + } + } + matched := o.compiledExpression.MatchString(lhs.S) return NewStaticBool(matched), err case OpNotRegex: - matched, err := regexp.MatchString(rhs.S, lhs.S) + if o.compiledExpression == nil { + o.compiledExpression, err = regexp.Compile(rhs.S) + if err != nil { + return NewStaticNil(), err + } + } + matched := o.compiledExpression.MatchString(lhs.S) return NewStaticBool(!matched), err case OpAnd: return NewStaticBool(lhs.B && rhs.B), nil diff --git a/pkg/traceql/ast_stringer.go b/pkg/traceql/ast_stringer.go index b18c9be25ac..d908594c2ca 100644 --- a/pkg/traceql/ast_stringer.go +++ b/pkg/traceql/ast_stringer.go @@ -58,7 +58,7 @@ func (f ScalarFilter) String() string { return binaryOp(f.op, f.lhs, f.rhs) } -func (o BinaryOperation) String() string { +func (o *BinaryOperation) String() string { return binaryOp(o.Op, o.LHS, o.RHS) } diff --git a/pkg/traceql/ast_validate.go b/pkg/traceql/ast_validate.go index fb18ecc97b5..aeb9d4eff8a 100644 --- a/pkg/traceql/ast_validate.go +++ b/pkg/traceql/ast_validate.go @@ -1,6 +1,9 @@ package traceql -import "fmt" +import ( + "fmt" + "regexp" +) // unsupportedError is returned for traceql features that are not yet supported. type unsupportedError struct { @@ -156,7 +159,7 @@ func (f ScalarFilter) validate() error { return nil } -func (o BinaryOperation) validate() error { +func (o *BinaryOperation) validate() error { if err := o.LHS.validate(); err != nil { return err } @@ -179,6 +182,14 @@ func (o BinaryOperation) validate() error { return fmt.Errorf("illegal operation for the given types: %s", o.String()) } + // if this is a regex operator confirm the RHS is a valid regex + if o.Op == OpRegex || o.Op == OpNotRegex { + _, err := regexp.Compile(o.RHS.String()) + if err != nil { + return fmt.Errorf("invalid regex: %s", o.RHS.String()) + } + } + // this condition may not be possible to hit since it's not parseable. // however, if we did somehow end up this situation, it would be good to return // a reasonable error diff --git a/pkg/traceql/test_examples.yaml b/pkg/traceql/test_examples.yaml index a71c5c80dab..6949f1b3aa8 100644 --- a/pkg/traceql/test_examples.yaml +++ b/pkg/traceql/test_examples.yaml @@ -205,6 +205,9 @@ validate_fails: - '{ traceDuration > "test" }' - '{ rootServiceName = 1 }' - '{ rootName != 3.2 }' + # invalid regexes + - '{ span.foo =~ "[" }' + - '{ span.foo !~ "[" }' # unary operators - incorrect types - '{ -true }' - '{ -"foo" = "bar" }'