From 865a7152e96a731c33d9b11339d3af02295aff1c Mon Sep 17 00:00:00 2001 From: Mathilde Gilles Date: Wed, 16 Aug 2023 16:49:06 +0200 Subject: [PATCH] providers/linux: optimize parseKeyValue (#186) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This optimizes `parseKeyValue()` function in the linux provider. It reduces the CPU cost and completely eliminates memory allocations. ``` │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ ParseKeyValue-10 3540.0n ± ∞ ¹ 972.6n ± ∞ ¹ -72.53% (p=0.008 n=5) │ B/op │ B/op vs base │ ParseKeyValue-10 6.672Ki ± ∞ ¹ 0.000Ki ± ∞ ¹ -100.00% (p=0.008 n=5) │ allocs/op │ allocs/op vs base │ ParseKeyValue-10 58.00 ± ∞ ¹ 0.00 ± ∞ ¹ -100.00% (p=0.008 n=5) ``` --------- Co-authored-by: Andrew Kroh Co-authored-by: Dan Kortschak <90160302+efd6@users.noreply.github.com> --- .changelog/186.txt | 3 + providers/linux/capabilities_linux.go | 2 +- providers/linux/memory_linux.go | 2 +- providers/linux/process_linux.go | 2 +- providers/linux/seccomp_linux.go | 2 +- providers/linux/util.go | 24 ++-- providers/linux/util_test.go | 162 ++++++++++++++++++++++++++ providers/linux/vmstat.go | 2 +- 8 files changed, 186 insertions(+), 13 deletions(-) create mode 100644 .changelog/186.txt create mode 100644 providers/linux/util_test.go diff --git a/.changelog/186.txt b/.changelog/186.txt new file mode 100644 index 00000000..6f823f28 --- /dev/null +++ b/.changelog/186.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +linux: optimize linux key value parsing (ie: /proc files) +``` diff --git a/providers/linux/capabilities_linux.go b/providers/linux/capabilities_linux.go index b1af31de..40bf454d 100644 --- a/providers/linux/capabilities_linux.go +++ b/providers/linux/capabilities_linux.go @@ -86,7 +86,7 @@ func capabilityName(num int) string { func readCapabilities(content []byte) (*types.CapabilityInfo, error) { var cap types.CapabilityInfo - err := parseKeyValue(content, ":", func(key, value []byte) error { + err := parseKeyValue(content, ':', func(key, value []byte) error { var err error switch string(key) { case "CapInh": diff --git a/providers/linux/memory_linux.go b/providers/linux/memory_linux.go index c04bad0d..c0c5ab85 100644 --- a/providers/linux/memory_linux.go +++ b/providers/linux/memory_linux.go @@ -29,7 +29,7 @@ func parseMemInfo(content []byte) (*types.HostMemoryInfo, error) { } hasAvailable := false - err := parseKeyValue(content, ":", func(key, value []byte) error { + err := parseKeyValue(content, ':', func(key, value []byte) error { num, err := parseBytesOrNumber(value) if err != nil { return fmt.Errorf("failed to parse %v value of %v: %w", string(key), string(value), err) diff --git a/providers/linux/process_linux.go b/providers/linux/process_linux.go index 10cb947e..52bae255 100644 --- a/providers/linux/process_linux.go +++ b/providers/linux/process_linux.go @@ -229,7 +229,7 @@ func (p *process) User() (types.UserInfo, error) { } var user types.UserInfo - err = parseKeyValue(content, ":", func(key, value []byte) error { + err = parseKeyValue(content, ':', func(key, value []byte) error { // See proc(5) for the format of /proc/[pid]/status switch string(key) { case "Uid": diff --git a/providers/linux/seccomp_linux.go b/providers/linux/seccomp_linux.go index d04bb3c7..fd38ea45 100644 --- a/providers/linux/seccomp_linux.go +++ b/providers/linux/seccomp_linux.go @@ -47,7 +47,7 @@ func (m SeccompMode) String() string { func readSeccompFields(content []byte) (*types.SeccompInfo, error) { var seccomp types.SeccompInfo - err := parseKeyValue(content, ":", func(key, value []byte) error { + err := parseKeyValue(content, ':', func(key, value []byte) error { switch string(key) { case "Seccomp": mode, err := strconv.ParseUint(string(value), 10, 8) diff --git a/providers/linux/util.go b/providers/linux/util.go index b8705a13..8d9c27df 100644 --- a/providers/linux/util.go +++ b/providers/linux/util.go @@ -26,20 +26,28 @@ import ( "strconv" ) -func parseKeyValue(content []byte, separator string, callback func(key, value []byte) error) error { - sc := bufio.NewScanner(bytes.NewReader(content)) - for sc.Scan() { - parts := bytes.SplitN(sc.Bytes(), []byte(separator), 2) - if len(parts) != 2 { +// parseKeyValue parses key/val pairs separated by the provided separator from +// each line in content and invokes the callback. White-space is trimmed from +// val. Empty lines are ignored. All non-empty lines must contain the separator +// otherwise an error is returned. +func parseKeyValue(content []byte, separator byte, callback func(key, value []byte) error) error { + var line []byte + + for len(content) > 0 { + line, content, _ = bytes.Cut(content, []byte{'\n'}) + if len(line) == 0 { continue } - if err := callback(parts[0], bytes.TrimSpace(parts[1])); err != nil { - return err + key, value, ok := bytes.Cut(line, []byte{separator}) + if !ok { + return fmt.Errorf("separator %q not found", separator) } + + callback(key, bytes.TrimSpace(value)) } - return sc.Err() + return nil } func findValue(filename, separator, key string) (string, error) { diff --git a/providers/linux/util_test.go b/providers/linux/util_test.go new file mode 100644 index 00000000..388ec8b6 --- /dev/null +++ b/providers/linux/util_test.go @@ -0,0 +1,162 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package linux + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseKeyValueNoEOL(t *testing.T) { + vals := [][2]string{} + err := parseKeyValue([]byte( + "Name: zsh\nUmask: 0022\nState: S (sleeping)\nUid: 1000 1000 1000 1000", + ), ':', func(key, value []byte) error { + vals = append(vals, [2]string{string(key), string(value)}) + return nil + }) + assert.NoError(t, err) + + assert.Equal(t, [][2]string{ + {"Name", "zsh"}, + {"Umask", "0022"}, + {"State", "S (sleeping)"}, + {"Uid", "1000\t1000\t1000\t1000"}, + }, vals) +} + +func TestParseKeyValueEmptyLine(t *testing.T) { + vals := [][2]string{} + err := parseKeyValue([]byte( + "Name: zsh\nUmask: 0022\nState: S (sleeping)\n\nUid: 1000 1000 1000 1000", + ), ':', func(key, value []byte) error { + vals = append(vals, [2]string{string(key), string(value)}) + return nil + }) + assert.NoError(t, err) + + assert.Equal(t, [][2]string{ + {"Name", "zsh"}, + {"Umask", "0022"}, + {"State", "S (sleeping)"}, + {"Uid", "1000\t1000\t1000\t1000"}, + }, vals) +} + +func TestParseKeyValueEOL(t *testing.T) { + vals := [][2]string{} + err := parseKeyValue([]byte( + "Name: zsh\nUmask: 0022\nState: S (sleeping)\nUid: 1000 1000 1000 1000\n", + ), ':', func(key, value []byte) error { + vals = append(vals, [2]string{string(key), string(value)}) + return nil + }) + assert.NoError(t, err) + + assert.Equal(t, [][2]string{ + {"Name", "zsh"}, + {"Umask", "0022"}, + {"State", "S (sleeping)"}, + {"Uid", "1000\t1000\t1000\t1000"}, + }, vals) +} + +// from cat /proc/$$/status +var testProcStatus = []byte(`Name: zsh +Umask: 0022 +State: S (sleeping) +Tgid: 4023363 +Ngid: 0 +Pid: 4023363 +PPid: 4023357 +TracerPid: 0 +Uid: 1000 1000 1000 1000 +Gid: 1000 1000 1000 1000 +FDSize: 64 +Groups: 24 25 27 29 30 44 46 102 109 112 116 119 131 998 1000 +NStgid: 4023363 +NSpid: 4023363 +NSpgid: 4023363 +NSsid: 4023363 +VmPeak: 15596 kB +VmSize: 15144 kB +VmLck: 0 kB +VmPin: 0 kB +VmHWM: 9060 kB +VmRSS: 8716 kB +RssAnon: 3828 kB +RssFile: 4888 kB +RssShmem: 0 kB +VmData: 3500 kB +VmStk: 328 kB +VmExe: 600 kB +VmLib: 2676 kB +VmPTE: 68 kB +VmSwap: 0 kB +HugetlbPages: 0 kB +CoreDumping: 0 +THP_enabled: 1 +Threads: 1 +SigQ: 0/126683 +SigPnd: 0000000000000000 +ShdPnd: 0000000000000000 +SigBlk: 0000000000000002 +SigIgn: 0000000000384000 +SigCgt: 0000000008013003 +CapInh: 0000000000000000 +CapPrm: 0000000000000000 +CapEff: 0000000000000000 +CapBnd: 000001ffffffffff +CapAmb: 0000000000000000 +NoNewPrivs: 0 +Seccomp: 0 +Seccomp_filters: 0 +Speculation_Store_Bypass: thread vulnerable +Cpus_allowed: fff +Cpus_allowed_list: 0-11 +Mems_allowed: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001 +Mems_allowed_list: 0 +voluntary_ctxt_switches: 223 +nonvoluntary_ctxt_switches: 25 +`) + +func BenchmarkParseKeyValue(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = parseKeyValue(testProcStatus, ':', func(key, value []byte) error { + return nil + }) + } +} + +func FuzzParseKeyValue(f *testing.F) { + testcases := []string{ + "no_separator", + "no_value:", + "empty_value: ", + "normal: 223", + } + for _, tc := range testcases { + f.Add(tc) + } + f.Fuzz(func(t *testing.T, orig string) { + _ = parseKeyValue([]byte(orig), ':', func(key, value []byte) error { + return nil + }) + }) +} diff --git a/providers/linux/vmstat.go b/providers/linux/vmstat.go index 2b9e8780..ea918c84 100644 --- a/providers/linux/vmstat.go +++ b/providers/linux/vmstat.go @@ -45,7 +45,7 @@ func parseVMStat(content []byte) (*types.VMStatInfo, error) { var vmStat types.VMStatInfo refValues := reflect.ValueOf(&vmStat).Elem() - err := parseKeyValue(content, " ", func(key, value []byte) error { + err := parseKeyValue(content, ' ', func(key, value []byte) error { // turn our []byte value into an int val, err := parseBytesOrNumber(value) if err != nil {