Skip to content

Commit

Permalink
providers/linux: optimize parseKeyValue (#186)
Browse files Browse the repository at this point in the history
This optimizes `parseKeyValue()` function in the linux provider. It reduces the
CPU cost and completely eliminates memory allocations.

```
                 │  before.txt   │              after.txt              │
                 │    sec/op     │    sec/op     vs base               │
ParseKeyValue-10   3540.0n ± ∞ ¹   972.6n ± ∞ ¹  -72.53% (p=0.008 n=5)

                 │     B/op      │     B/op       vs base                │
ParseKeyValue-10   6.672Ki ± ∞ ¹   0.000Ki ± ∞ ¹  -100.00% (p=0.008 n=5)

                 │  allocs/op  │ allocs/op   vs base                │
ParseKeyValue-10   58.00 ± ∞ ¹   0.00 ± ∞ ¹  -100.00% (p=0.008 n=5)
```

---------

Co-authored-by: Andrew Kroh <[email protected]>
Co-authored-by: Dan Kortschak <[email protected]>
  • Loading branch information
3 people authored Aug 16, 2023
1 parent e4ac65c commit 865a715
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 13 deletions.
3 changes: 3 additions & 0 deletions .changelog/186.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
linux: optimize linux key value parsing (ie: /proc files)
```
2 changes: 1 addition & 1 deletion providers/linux/capabilities_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func capabilityName(num int) string {
func readCapabilities(content []byte) (*types.CapabilityInfo, error) {
var cap types.CapabilityInfo

err := parseKeyValue(content, ":", func(key, value []byte) error {
err := parseKeyValue(content, ':', func(key, value []byte) error {
var err error
switch string(key) {
case "CapInh":
Expand Down
2 changes: 1 addition & 1 deletion providers/linux/memory_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func parseMemInfo(content []byte) (*types.HostMemoryInfo, error) {
}

hasAvailable := false
err := parseKeyValue(content, ":", func(key, value []byte) error {
err := parseKeyValue(content, ':', func(key, value []byte) error {
num, err := parseBytesOrNumber(value)
if err != nil {
return fmt.Errorf("failed to parse %v value of %v: %w", string(key), string(value), err)
Expand Down
2 changes: 1 addition & 1 deletion providers/linux/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func (p *process) User() (types.UserInfo, error) {
}

var user types.UserInfo
err = parseKeyValue(content, ":", func(key, value []byte) error {
err = parseKeyValue(content, ':', func(key, value []byte) error {
// See proc(5) for the format of /proc/[pid]/status
switch string(key) {
case "Uid":
Expand Down
2 changes: 1 addition & 1 deletion providers/linux/seccomp_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (m SeccompMode) String() string {
func readSeccompFields(content []byte) (*types.SeccompInfo, error) {
var seccomp types.SeccompInfo

err := parseKeyValue(content, ":", func(key, value []byte) error {
err := parseKeyValue(content, ':', func(key, value []byte) error {
switch string(key) {
case "Seccomp":
mode, err := strconv.ParseUint(string(value), 10, 8)
Expand Down
24 changes: 16 additions & 8 deletions providers/linux/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,28 @@ import (
"strconv"
)

func parseKeyValue(content []byte, separator string, callback func(key, value []byte) error) error {
sc := bufio.NewScanner(bytes.NewReader(content))
for sc.Scan() {
parts := bytes.SplitN(sc.Bytes(), []byte(separator), 2)
if len(parts) != 2 {
// parseKeyValue parses key/val pairs separated by the provided separator from
// each line in content and invokes the callback. White-space is trimmed from
// val. Empty lines are ignored. All non-empty lines must contain the separator
// otherwise an error is returned.
func parseKeyValue(content []byte, separator byte, callback func(key, value []byte) error) error {
var line []byte

for len(content) > 0 {
line, content, _ = bytes.Cut(content, []byte{'\n'})
if len(line) == 0 {
continue
}

if err := callback(parts[0], bytes.TrimSpace(parts[1])); err != nil {
return err
key, value, ok := bytes.Cut(line, []byte{separator})
if !ok {
return fmt.Errorf("separator %q not found", separator)
}

callback(key, bytes.TrimSpace(value))
}

return sc.Err()
return nil
}

func findValue(filename, separator, key string) (string, error) {
Expand Down
162 changes: 162 additions & 0 deletions providers/linux/util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package linux

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestParseKeyValueNoEOL(t *testing.T) {
vals := [][2]string{}
err := parseKeyValue([]byte(
"Name: zsh\nUmask: 0022\nState: S (sleeping)\nUid: 1000 1000 1000 1000",
), ':', func(key, value []byte) error {
vals = append(vals, [2]string{string(key), string(value)})
return nil
})
assert.NoError(t, err)

assert.Equal(t, [][2]string{
{"Name", "zsh"},
{"Umask", "0022"},
{"State", "S (sleeping)"},
{"Uid", "1000\t1000\t1000\t1000"},
}, vals)
}

func TestParseKeyValueEmptyLine(t *testing.T) {
vals := [][2]string{}
err := parseKeyValue([]byte(
"Name: zsh\nUmask: 0022\nState: S (sleeping)\n\nUid: 1000 1000 1000 1000",
), ':', func(key, value []byte) error {
vals = append(vals, [2]string{string(key), string(value)})
return nil
})
assert.NoError(t, err)

assert.Equal(t, [][2]string{
{"Name", "zsh"},
{"Umask", "0022"},
{"State", "S (sleeping)"},
{"Uid", "1000\t1000\t1000\t1000"},
}, vals)
}

func TestParseKeyValueEOL(t *testing.T) {
vals := [][2]string{}
err := parseKeyValue([]byte(
"Name: zsh\nUmask: 0022\nState: S (sleeping)\nUid: 1000 1000 1000 1000\n",
), ':', func(key, value []byte) error {
vals = append(vals, [2]string{string(key), string(value)})
return nil
})
assert.NoError(t, err)

assert.Equal(t, [][2]string{
{"Name", "zsh"},
{"Umask", "0022"},
{"State", "S (sleeping)"},
{"Uid", "1000\t1000\t1000\t1000"},
}, vals)
}

// from cat /proc/$$/status
var testProcStatus = []byte(`Name: zsh
Umask: 0022
State: S (sleeping)
Tgid: 4023363
Ngid: 0
Pid: 4023363
PPid: 4023357
TracerPid: 0
Uid: 1000 1000 1000 1000
Gid: 1000 1000 1000 1000
FDSize: 64
Groups: 24 25 27 29 30 44 46 102 109 112 116 119 131 998 1000
NStgid: 4023363
NSpid: 4023363
NSpgid: 4023363
NSsid: 4023363
VmPeak: 15596 kB
VmSize: 15144 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 9060 kB
VmRSS: 8716 kB
RssAnon: 3828 kB
RssFile: 4888 kB
RssShmem: 0 kB
VmData: 3500 kB
VmStk: 328 kB
VmExe: 600 kB
VmLib: 2676 kB
VmPTE: 68 kB
VmSwap: 0 kB
HugetlbPages: 0 kB
CoreDumping: 0
THP_enabled: 1
Threads: 1
SigQ: 0/126683
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000002
SigIgn: 0000000000384000
SigCgt: 0000000008013003
CapInh: 0000000000000000
CapPrm: 0000000000000000
CapEff: 0000000000000000
CapBnd: 000001ffffffffff
CapAmb: 0000000000000000
NoNewPrivs: 0
Seccomp: 0
Seccomp_filters: 0
Speculation_Store_Bypass: thread vulnerable
Cpus_allowed: fff
Cpus_allowed_list: 0-11
Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001
Mems_allowed_list: 0
voluntary_ctxt_switches: 223
nonvoluntary_ctxt_switches: 25
`)

func BenchmarkParseKeyValue(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parseKeyValue(testProcStatus, ':', func(key, value []byte) error {
return nil
})
}
}

func FuzzParseKeyValue(f *testing.F) {
testcases := []string{
"no_separator",
"no_value:",
"empty_value: ",
"normal: 223",
}
for _, tc := range testcases {
f.Add(tc)
}
f.Fuzz(func(t *testing.T, orig string) {
_ = parseKeyValue([]byte(orig), ':', func(key, value []byte) error {
return nil
})
})
}
2 changes: 1 addition & 1 deletion providers/linux/vmstat.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func parseVMStat(content []byte) (*types.VMStatInfo, error) {
var vmStat types.VMStatInfo
refValues := reflect.ValueOf(&vmStat).Elem()

err := parseKeyValue(content, " ", func(key, value []byte) error {
err := parseKeyValue(content, ' ', func(key, value []byte) error {
// turn our []byte value into an int
val, err := parseBytesOrNumber(value)
if err != nil {
Expand Down

0 comments on commit 865a715

Please sign in to comment.