Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hackathon Pyrobench: Test previous optimization https://github.com/prometheus/prometheus/pull/13843 #36

Draft
wants to merge 2 commits into
base: 20240807_retest-perf-b-before
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/pyrobench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Pyrobench

Check failure

Code scanning / Scorecard

Token-Permissions High

score is 0: no topLevel permission defined
Remediation tip: Visit https://app.stepsecurity.io/secureworkflow.
Tick the 'Restrict permissions for GITHUB_TOKEN'
Untick other options
NOTE: If you want to resolve multiple issues at once, you can visit https://app.stepsecurity.io/securerepo instead.
Click Remediation section below for further remediation help
on: [pull_request]
jobs:
test:
name: Bench
runs-on: ubuntu-latest

steps:
- name: Set up Go 1.22
uses: actions/setup-go@v5

Check warning

Code scanning / Scorecard

Pinned-Dependencies Medium

score is 6: GitHub-owned GitHubAction not pinned by hash
Click Remediation section below to solve this issue
with:
go-version: 1.22
id: go

- name: Check out code into the Go module directory
uses: actions/checkout@v4

Check warning

Code scanning / Scorecard

Pinned-Dependencies Medium

score is 6: GitHub-owned GitHubAction not pinned by hash
Click Remediation section below to solve this issue
with:
fetch-depth: 1024

- run: |
# Ensure base branch is fetched
git fetch origin "${GITHUB_BASE_REF}" --depth 1024
echo "GITHUB_BASE_REF=${GITHUB_BASE_REF}"

- name: Run Benchmark
run: |
go run github.com/grafana/pyrobench@26f65feff9ff5ad092fb21b6931be8b730554a86 -v compare --git-base origin/$GITHUB_BASE_REF --github-commenter
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
129 changes: 102 additions & 27 deletions model/labels/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const (
maxSetMatches = 256

// The minimum number of alternate values a regex should have to trigger
// the optimization done by optimizeEqualStringMatchers() and so use a map
// the optimization done by optimizeEqualOrPrefixStringMatchers() and so use a map
// to match values instead of iterating over a list. This value has
// been computed running BenchmarkOptimizeEqualStringMatchers.
minEqualMultiStringMatcherMapThreshold = 16
Expand Down Expand Up @@ -337,7 +337,7 @@ func optimizeAlternatingLiterals(s string) (StringMatcher, []string) {
return nil, nil
}

multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates)
multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates, 0, 0)

for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') {
// Split the string into the next literal and the remainder
Expand Down Expand Up @@ -412,7 +412,7 @@ func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher {
clearBeginEndText(re)

m := stringMatcherFromRegexpInternal(re)
m = optimizeEqualStringMatchers(m, minEqualMultiStringMatcherMapThreshold)
m = optimizeEqualOrPrefixStringMatchers(m, minEqualMultiStringMatcherMapThreshold)

return m
}
Expand Down Expand Up @@ -732,17 +732,20 @@ func (m *equalStringMatcher) Matches(s string) bool {
type multiStringMatcherBuilder interface {
StringMatcher
add(s string)
addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher)
setMatches() []string
}

func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize int) multiStringMatcherBuilder {
func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize, estimatedPrefixes, minPrefixLength int) multiStringMatcherBuilder {
// If the estimated size is low enough, it's faster to use a slice instead of a map.
if estimatedSize < minEqualMultiStringMatcherMapThreshold {
if estimatedSize < minEqualMultiStringMatcherMapThreshold && estimatedPrefixes == 0 {
return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)}
}

return &equalMultiStringMapMatcher{
values: make(map[string]struct{}, estimatedSize),
prefixes: make(map[string][]StringMatcher, estimatedPrefixes),
minPrefixLen: minPrefixLength,
caseSensitive: caseSensitive,
}
}
Expand All @@ -758,6 +761,10 @@ func (m *equalMultiStringSliceMatcher) add(s string) {
m.values = append(m.values, s)
}

func (m *equalMultiStringSliceMatcher) addPrefix(_ string, _ bool, _ StringMatcher) {
panic("not implemented")
}

func (m *equalMultiStringSliceMatcher) setMatches() []string {
return m.values
}
Expand All @@ -779,12 +786,17 @@ func (m *equalMultiStringSliceMatcher) Matches(s string) bool {
return false
}

// equalMultiStringMapMatcher matches a string exactly against a map of valid values.
// equalMultiStringMapMatcher matches a string exactly against a map of valid values
// or against a set of prefix matchers.
type equalMultiStringMapMatcher struct {
// values contains values to match a string against. If the matching is case insensitive,
// the values here must be lowercase.
values map[string]struct{}

// prefixes maps strings, all of length minPrefixLen, to sets of matchers to check the rest of the string.
// If the matching is case insensitive, prefixes are all lowercase.
prefixes map[string][]StringMatcher
// minPrefixLen can be zero, meaning there are no prefix matchers.
minPrefixLen int
caseSensitive bool
}

Expand All @@ -796,8 +808,27 @@ func (m *equalMultiStringMapMatcher) add(s string) {
m.values[s] = struct{}{}
}

func (m *equalMultiStringMapMatcher) addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher) {
if m.minPrefixLen == 0 {
panic("addPrefix called when no prefix length defined")
}
if len(prefix) < m.minPrefixLen {
panic("addPrefix called with a too short prefix")
}
if m.caseSensitive != prefixCaseSensitive {
panic("addPrefix called with a prefix whose case sensitivity is different than the expected one")
}

s := prefix[:m.minPrefixLen]
if !m.caseSensitive {
s = strings.ToLower(s)
}

m.prefixes[s] = append(m.prefixes[s], matcher)
}

func (m *equalMultiStringMapMatcher) setMatches() []string {
if len(m.values) >= maxSetMatches {
if len(m.values) >= maxSetMatches || len(m.prefixes) > 0 {
return nil
}

Expand All @@ -813,8 +844,17 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool {
s = toNormalisedLower(s)
}

_, ok := m.values[s]
return ok
if _, ok := m.values[s]; ok {
return true
}
if m.minPrefixLen > 0 && len(s) >= m.minPrefixLen {
for _, matcher := range m.prefixes[s[:m.minPrefixLen]] {
if matcher.Matches(s) {
return true
}
}
}
return false
}

// toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
Expand Down Expand Up @@ -897,20 +937,24 @@ func (m trueMatcher) Matches(_ string) bool {
return true
}

// optimizeEqualStringMatchers optimize a specific case where all matchers are made by an
// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher). In
// this specific case, when we have many strings to match against we can use a map instead
// optimizeEqualOrPrefixStringMatchers optimize a specific case where all matchers are made by an
// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher) or
// with a literal prefix (literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher).
//
// In this specific case, when we have many strings to match against we can use a map instead
// of iterating over the list of strings.
func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatcher {
func optimizeEqualOrPrefixStringMatchers(input StringMatcher, threshold int) StringMatcher {
var (
caseSensitive bool
caseSensitiveSet bool
numValues int
numPrefixes int
minPrefixLength int
)

// Analyse the input StringMatcher to count the number of occurrences
// and ensure all of them have the same case sensitivity.
analyseCallback := func(matcher *equalStringMatcher) bool {
analyseEqualMatcherCallback := func(matcher *equalStringMatcher) bool {
// Ensure we don't have mixed case sensitivity.
if caseSensitiveSet && caseSensitive != matcher.caseSensitive {
return false
Expand All @@ -923,34 +967,55 @@ func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatch
return true
}

if !findEqualStringMatchers(input, analyseCallback) {
analysePrefixMatcherCallback := func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool {
// Ensure we don't have mixed case sensitivity.
if caseSensitiveSet && caseSensitive != prefixCaseSensitive {
return false
} else if !caseSensitiveSet {
caseSensitive = prefixCaseSensitive
caseSensitiveSet = true
}
if numPrefixes == 0 || len(prefix) < minPrefixLength {
minPrefixLength = len(prefix)
}

numPrefixes++
return true
}

if !findEqualOrPrefixStringMatchers(input, analyseEqualMatcherCallback, analysePrefixMatcherCallback) {
return input
}

// If the number of values found is less than the threshold, then we should skip the optimization.
if numValues < threshold {
// If the number of values and prefixes found is less than the threshold, then we should skip the optimization.
if (numValues + numPrefixes) < threshold {
return input
}

// Parse again the input StringMatcher to extract all values and storing them.
// We can skip the case sensitivity check because we've already checked it and
// if the code reach this point then it means all matchers have the same case sensitivity.
multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues)
multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues, numPrefixes, minPrefixLength)

// Ignore the return value because we already iterated over the input StringMatcher
// and it was all good.
findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool {
findEqualOrPrefixStringMatchers(input, func(matcher *equalStringMatcher) bool {
multiMatcher.add(matcher.s)
return true
}, func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool {
multiMatcher.addPrefix(prefix, caseSensitive, matcher)
return true
})

return multiMatcher
}

// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each
// equalStringMatcher found. Returns true if and only if the input StringMatcher is *only*
// composed by an alternation of equalStringMatcher.
func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalStringMatcher) bool) bool {
// findEqualOrPrefixStringMatchers analyze the input StringMatcher and calls the equalMatcherCallback for each
// equalStringMatcher found, and prefixMatcherCallback for each literalPrefixSensitiveStringMatcher and literalPrefixInsensitiveStringMatcher found.
//
// Returns true if and only if the input StringMatcher is *only* composed by an alternation of equalStringMatcher and/or
// literal prefix matcher. Returns false if prefixMatcherCallback is nil and a literal prefix matcher is encountered.
func findEqualOrPrefixStringMatchers(input StringMatcher, equalMatcherCallback func(matcher *equalStringMatcher) bool, prefixMatcherCallback func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool) bool {
orInput, ok := input.(orStringMatcher)
if !ok {
return false
Expand All @@ -959,17 +1024,27 @@ func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalSt
for _, m := range orInput {
switch casted := m.(type) {
case orStringMatcher:
if !findEqualStringMatchers(m, callback) {
if !findEqualOrPrefixStringMatchers(m, equalMatcherCallback, prefixMatcherCallback) {
return false
}

case *equalStringMatcher:
if !callback(casted) {
if !equalMatcherCallback(casted) {
return false
}

case *literalPrefixSensitiveStringMatcher:
if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, true, casted) {
return false
}

case *literalPrefixInsensitiveStringMatcher:
if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, false, casted) {
return false
}

default:
// It's not an equal string matcher, so we have to stop searching
// It's not an equal or prefix string matcher, so we have to stop searching
// cause this optimization can't be applied.
return false
}
Expand Down
Loading
Loading