From a32f9ca2ff0924343eab326eae718ebe30de4cb7 Mon Sep 17 00:00:00 2001 From: fredbi Date: Tue, 7 Feb 2023 08:11:02 +0100 Subject: [PATCH] Perf(reporthandling): uniqueStrings & trimunique (#81) * introduced internal package for optimized versions of slice unique & trim * profiled & benchmarked internal/slices package * moved helpers call for listing to the internal package * adapted reporthandling to use internal/slices * removed erroneous comment about make map, corrected benchmark * improved slice alloc on trimmed resources * fixup listing failed to unique Signed-off-by: Frederic BIDON * fix: renamed package for newly added test Signed-off-by: Frederic BIDON --------- Signed-off-by: Frederic BIDON --- reporthandling/datastructuresmethodshelper.go | 17 +- reporthandling/helpers/v1/cloudmetadata.go | 2 +- .../helpers/v1/cloudmetadata_test.go | 2 +- reporthandling/helpers/v1/filters.go | 2 +- reporthandling/helpers/v1/filters_test.go | 2 +- reporthandling/helpers/v1/listing.go | 64 ++--- .../helpers/v1/listing_benchmark_test.go | 20 ++ reporthandling/helpers/v1/listing_mocks.go | 2 +- reporthandling/helpers/v1/listing_test.go | 82 +----- reporthandling/helpers/v1/status.go | 2 +- reporthandling/helpers/v1/status_test.go | 2 +- reporthandling/internal/slices/README.md | 69 +++++ .../internal/slices/string_slices.go | 189 ++++++++++++ .../slices/string_slices_benchmark_test.go | 185 ++++++++++++ .../internal/slices/string_slices_test.go | 269 ++++++++++++++++++ reporthandling/resultshandling.go | 39 +-- 16 files changed, 791 insertions(+), 157 deletions(-) create mode 100644 reporthandling/helpers/v1/listing_benchmark_test.go create mode 100644 reporthandling/internal/slices/README.md create mode 100644 reporthandling/internal/slices/string_slices.go create mode 100644 reporthandling/internal/slices/string_slices_benchmark_test.go create mode 100644 reporthandling/internal/slices/string_slices_test.go diff --git a/reporthandling/datastructuresmethodshelper.go b/reporthandling/datastructuresmethodshelper.go index d2dcfedd..fb3223f9 100644 --- a/reporthandling/datastructuresmethodshelper.go +++ b/reporthandling/datastructuresmethodshelper.go @@ -1,5 +1,9 @@ package reporthandling +import ( + "github.com/kubescape/opa-utils/reporthandling/internal/slices" +) + type ResourcesIDs struct { passedResources []string failedResources []string @@ -31,17 +35,17 @@ func (r *ResourcesIDs) setFailedResources(a []string) { // setWarningResources - initialized after failed resources are set func (r *ResourcesIDs) setWarningResources(a []string) { - r.warningResources = TrimUniqueIDs(GetUniqueResourcesIDs(a), r.failedResources) + r.warningResources = slices.TrimStable(GetUniqueResourcesIDs(a), r.failedResources) } // setPassedResources - initialized after warning resources are set func (r *ResourcesIDs) setPassedResources(a []string) { - r.passedResources = TrimUniqueIDs(GetUniqueResourcesIDs(a), append(r.failedResources, r.warningResources...)) + r.passedResources = slices.TrimStable(GetUniqueResourcesIDs(a), append(r.failedResources, r.warningResources...)) } func deleteFromMap(m map[string]interface{}, keepFields []string) { for k := range m { - if StringInSlice(keepFields, k) { + if slices.StringInSlice(keepFields, k) { continue } delete(m, k) @@ -49,12 +53,7 @@ func deleteFromMap(m map[string]interface{}, keepFields []string) { } func StringInSlice(strSlice []string, str string) bool { - for i := range strSlice { - if strSlice[i] == str { - return true - } - } - return false + return slices.StringInSlice(strSlice, str) } func RemoveResponse(slice []RuleResponse, index int) []RuleResponse { diff --git a/reporthandling/helpers/v1/cloudmetadata.go b/reporthandling/helpers/v1/cloudmetadata.go index a1c91f59..e6a7740a 100644 --- a/reporthandling/helpers/v1/cloudmetadata.go +++ b/reporthandling/helpers/v1/cloudmetadata.go @@ -1,4 +1,4 @@ -package v1 +package helpers import ( "fmt" diff --git a/reporthandling/helpers/v1/cloudmetadata_test.go b/reporthandling/helpers/v1/cloudmetadata_test.go index 5150963a..260ae966 100644 --- a/reporthandling/helpers/v1/cloudmetadata_test.go +++ b/reporthandling/helpers/v1/cloudmetadata_test.go @@ -1,4 +1,4 @@ -package v1 +package helpers import ( "reflect" diff --git a/reporthandling/helpers/v1/filters.go b/reporthandling/helpers/v1/filters.go index a635b2b2..ae7c0912 100644 --- a/reporthandling/helpers/v1/filters.go +++ b/reporthandling/helpers/v1/filters.go @@ -1,4 +1,4 @@ -package v1 +package helpers import ( "github.com/armosec/armoapi-go/armotypes" diff --git a/reporthandling/helpers/v1/filters_test.go b/reporthandling/helpers/v1/filters_test.go index 1d128e29..6e3b7c3a 100644 --- a/reporthandling/helpers/v1/filters_test.go +++ b/reporthandling/helpers/v1/filters_test.go @@ -1,4 +1,4 @@ -package v1 +package helpers import ( "testing" diff --git a/reporthandling/helpers/v1/listing.go b/reporthandling/helpers/v1/listing.go index 41ff9c61..576870ba 100644 --- a/reporthandling/helpers/v1/listing.go +++ b/reporthandling/helpers/v1/listing.go @@ -1,8 +1,8 @@ -package v1 +package helpers import ( - "github.com/armosec/utils-go/str" "github.com/kubescape/opa-utils/reporthandling/apis" + "github.com/kubescape/opa-utils/reporthandling/internal/slices" ) // ReportObject any report object must be compliment with a map[string]interface{} structures @@ -104,10 +104,10 @@ func (all *AllLists) Update(all2 *AllLists) { // ToUnique - Call this function only when setting the List func (all *AllLists) toUniqueBase() { // remove duplications from each resource list - all.failed = str.SliceStringToUnique(all.failed) - all.passed = str.SliceStringToUnique(all.passed) - all.skipped = str.SliceStringToUnique(all.skipped) - all.other = str.SliceStringToUnique(all.other) + all.failed = slices.UniqueStrings(all.failed) + all.passed = slices.UniqueStrings(all.passed) + all.skipped = slices.UniqueStrings(all.skipped) + all.other = slices.UniqueStrings(all.other) } // ToUnique - Call this function only when setting the List @@ -117,44 +117,28 @@ func (all *AllLists) ToUniqueControls() { // ToUnique - Call this function only when setting the List func (all *AllLists) ToUniqueResources() { - all.toUniqueBase() - // remove failed from passed list - all.passed = trimUnique(all.passed, all.failed) - // remove failed, and passed from skipped list - trimmed := append(all.failed, all.passed...) - all.skipped = trimUnique(all.skipped, trimmed) - // remove failed, passed and skipped from other list - trimmed = append(trimmed, all.skipped...) - all.other = trimUnique(all.other, trimmed) -} + all.failed = slices.UniqueStrings(all.failed) -// trimUnique trim the list, return original list without the "trimFrom" list. the list is trimmed in place, so the original list is modified. Also, the list is not sorted -func trimUnique(origin, trimFrom []string) []string { - if len(origin) == 0 || len(trimFrom) == 0 { // if there is nothing to trim - return origin - } - toRemove := make(map[string]bool, len(trimFrom)) + const heuristicCapacity = 100 // alloc 100 slots to the stack. The rest would go to the heap - see https://github.com/golang/go/issues/58215 + trimmed := append(make([]string, 0, heuristicCapacity), make([]string, 0, max(len(all.failed)+len(all.excluded)+len(all.passed)+len(all.skipped), heuristicCapacity)-heuristicCapacity)...) - for i := range trimFrom { - toRemove[trimFrom[i]] = true - } + // remove failed and excluded from passed list + trimmed = append(trimmed, all.failed...) + all.passed = slices.TrimStableUnique(all.passed, trimmed) - originLen := len(origin) - for i := 0; i < originLen; { - if _, ok := toRemove[origin[i]]; ok { - str.RemoveIndexFromStringSlice(&origin, i) - originLen-- - } else { - i++ - } - } - return origin + // remove failed, excluded and passed from skipped list + trimmed = append(trimmed, all.passed...) + all.skipped = slices.TrimStableUnique(all.skipped, trimmed) + + // remove failed, excluded, passed and skipped from other list + trimmed = append(trimmed, all.skipped...) + all.other = slices.TrimStableUnique(all.other, trimmed) } -// appendSlice append a slice to a slice the index indicates the position of the slice -func appendSlice(origin, appendTo []string, index *int) { - for i := range appendTo { - origin[*index] = appendTo[i] - *index++ +func max(a, b int) int { + if a > b { + return a } + + return b } diff --git a/reporthandling/helpers/v1/listing_benchmark_test.go b/reporthandling/helpers/v1/listing_benchmark_test.go new file mode 100644 index 00000000..ae38f8b9 --- /dev/null +++ b/reporthandling/helpers/v1/listing_benchmark_test.go @@ -0,0 +1,20 @@ +package helpers + +import ( + "testing" + + "github.com/kubescape/opa-utils/reporthandling/apis" +) + +func BenchmarkToUniqueResources(b *testing.B) { + listA := mockAllListsA() + listA.Append(apis.StatusExcluded, "b") + listA.Append(apis.StatusExcluded, "b") + listA.Append(apis.StatusExcluded, "b") + b.ResetTimer() + b.ReportAllocs() + + for n := 0; n < b.N; n++ { + listA.ToUniqueResources() + } +} diff --git a/reporthandling/helpers/v1/listing_mocks.go b/reporthandling/helpers/v1/listing_mocks.go index 97275adf..52531e72 100644 --- a/reporthandling/helpers/v1/listing_mocks.go +++ b/reporthandling/helpers/v1/listing_mocks.go @@ -1,4 +1,4 @@ -package v1 +package helpers func MockAllListsForIntegration() *AllLists { return &AllLists{ diff --git a/reporthandling/helpers/v1/listing_test.go b/reporthandling/helpers/v1/listing_test.go index 7614934f..9c7bb365 100644 --- a/reporthandling/helpers/v1/listing_test.go +++ b/reporthandling/helpers/v1/listing_test.go @@ -1,4 +1,4 @@ -package v1 +package helpers import ( "testing" @@ -122,6 +122,7 @@ func TestAllListsUniqueControls(t *testing.T) { assert.Equal(t, 1, len(listMock.Skipped())) } +/* appendSlice and intToPointer are unused for now func TestAppendSlice(t *testing.T) { type args struct { origin []string @@ -160,81 +161,16 @@ func TestAppendSlice(t *testing.T) { }) } } + func intToPointer(i int) *int { return &i } -func TestTrimUnique(t *testing.T) { - type args struct { - origin []string - trimFrom []string - expected []string - } - tests := []struct { - name string - args args - }{ - { - name: "trim from begging of slice", - args: args{ - origin: []string{"a", "b", "c"}, - trimFrom: []string{"a"}, - expected: []string{"c", "b"}, - }, - }, - { - name: "trim from middle of slice", - args: args{ - origin: []string{"a", "b", "c"}, - trimFrom: []string{"b"}, - expected: []string{"a", "c"}, - }, - }, - { - name: "trim from end of slice", - args: args{ - origin: []string{"a", "b", "c"}, - trimFrom: []string{"c"}, - expected: []string{"a", "b"}, - }, - }, - { - name: "do nothing", - args: args{ - origin: []string{"a", "b", "c"}, - trimFrom: []string{"d"}, - expected: []string{"a", "b", "c"}, - }, - }, - { - name: "trim all", - args: args{ - origin: []string{"a", "b", "c"}, - trimFrom: []string{"a", "b", "c"}, - expected: []string{}, - }, - }, - { - name: "trimFrom larger", - args: args{ - origin: []string{"a", "b", "c"}, - trimFrom: []string{"a", "b", "e", "d"}, - expected: []string{"c"}, - }, - }, - { - name: "trim all not sorted", - args: args{ - origin: []string{"c", "a", "b"}, - trimFrom: []string{"a", "b", "c"}, - expected: []string{}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - dd := trimUnique(tt.args.origin, tt.args.trimFrom) - assert.Equal(t, tt.args.expected, dd) - }) +// appendSlice append a slice to a slice the index indicates the position of the slice +func appendSlice(origin, appendTo []string, index *int) { + for i := range appendTo { + origin[*index] = appendTo[i] + *index++ } } +*/ diff --git a/reporthandling/helpers/v1/status.go b/reporthandling/helpers/v1/status.go index 2d92f3d7..2058becb 100644 --- a/reporthandling/helpers/v1/status.go +++ b/reporthandling/helpers/v1/status.go @@ -1,4 +1,4 @@ -package v1 +package helpers import "github.com/kubescape/opa-utils/reporthandling/apis" diff --git a/reporthandling/helpers/v1/status_test.go b/reporthandling/helpers/v1/status_test.go index f94d4117..02e783d6 100644 --- a/reporthandling/helpers/v1/status_test.go +++ b/reporthandling/helpers/v1/status_test.go @@ -1,4 +1,4 @@ -package v1 +package helpers import ( "testing" diff --git a/reporthandling/internal/slices/README.md b/reporthandling/internal/slices/README.md new file mode 100644 index 00000000..6270b980 --- /dev/null +++ b/reporthandling/internal/slices/README.md @@ -0,0 +1,69 @@ +# slices + +Internal utility functions that manipulate slices of strings. + +All these methods alter their input slice and do not allocate extra memory. + +`UniqueStrings` is provided as a faster equivalent to `github.com/armosec/utils-go/str.SliceStringToUnique`. + +`Trim` is the same as the previously private function `trimUnique()` with similar performances. Code is just more straightforward. + +`TrimStable` has the same intent as `Trim` but does not alter the order of the input. + +`TrimUnique` and `TrimStableUnique` combine `UniqueStrings` and `Trim` (resp. `TrimStable`) in a single iteration of the input slice. + +## Benchmarks + +``` +go test -v -run Bench -bench . -benchtime 1s +goos: linux +goarch: amd64 +pkg: github.com/kubescape/opa-utils/reporthandling/internal/slices +cpu: AMD Ryzen 7 5800X 8-Core Processor +BenchmarkUnique +BenchmarkUnique/UniqueStrings_x_8 +BenchmarkUnique/UniqueStrings_x_8-16 6007258 192.3 ns/op 0 B/op 0 allocs/op +BenchmarkUnique/UniqueStrings_x_16 +BenchmarkUnique/UniqueStrings_x_16-16 3084657 386.0 ns/op 0 B/op 0 allocs/op +BenchmarkUnique/UniqueStrings_x_32 +BenchmarkUnique/UniqueStrings_x_32-16 3148867 387.5 ns/op 0 B/op 0 allocs/op +BenchmarkUnique/SliceStringToUnique_x_8 +BenchmarkUnique/SliceStringToUnique_x_8-16 2188087 543.4 ns/op 64 B/op 1 allocs/op +BenchmarkUnique/SliceStringToUnique_x_16 +BenchmarkUnique/SliceStringToUnique_x_16-16 2219115 544.2 ns/op 64 B/op 1 allocs/op +BenchmarkUnique/SliceStringToUnique_x_32 +BenchmarkUnique/SliceStringToUnique_x_32-16 2175615 548.6 ns/op 64 B/op 1 allocs/op +BenchmarkTrim +BenchmarkTrim/Trim_x_8 +BenchmarkTrim/Trim_x_8-16 5328590 222.1 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/Trim_x_16 +BenchmarkTrim/Trim_x_16-16 2748080 422.4 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/Trim_x_32 +BenchmarkTrim/Trim_x_32-16 2732326 425.8 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimStable_x_8 +BenchmarkTrim/TrimStable_x_8-16 5337442 216.5 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimStable_x_16 +BenchmarkTrim/TrimStable_x_16-16 2752138 435.2 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimStable_x_32 +BenchmarkTrim/TrimStable_x_32-16 2770256 432.0 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimSwap_x_8_(original_version) +BenchmarkTrim/TrimSwap_x_8_(original_version)-16 5299237 225.4 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimSwap_x_16_(original_version) +BenchmarkTrim/TrimSwap_x_16_(original_version)-16 2744328 437.5 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimSwap_x_32_(original_version) +BenchmarkTrim/TrimSwap_x_32_(original_version)-16 2800281 440.1 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimUnique_x_8 +BenchmarkTrim/TrimUnique_x_8-16 4220919 284.7 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimUnique_x_16 +BenchmarkTrim/TrimUnique_x_16-16 2469658 482.6 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimUnique_x_32 +BenchmarkTrim/TrimUnique_x_32-16 2408644 494.1 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimStableUnique_x_8 +BenchmarkTrim/TrimStableUnique_x_8-16 4250454 288.0 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimStableUnique_x_16 +BenchmarkTrim/TrimStableUnique_x_16-16 2390988 486.5 ns/op 0 B/op 0 allocs/op +BenchmarkTrim/TrimStableUnique_x_32 +BenchmarkTrim/TrimStableUnique_x_32-16 2416376 495.0 ns/op 0 B/op 0 allocs/op +PASS +ok +``` diff --git a/reporthandling/internal/slices/string_slices.go b/reporthandling/internal/slices/string_slices.go new file mode 100644 index 00000000..d81e14ce --- /dev/null +++ b/reporthandling/internal/slices/string_slices.go @@ -0,0 +1,189 @@ +package slices + +// UniqueStrings returns the unique (unsorted) values of a slice. +// +// NOTE: this function does not allocate extra memory: the input slice is altered in-place. +// +// The returned slice is truncated in capacity to the number of unique elements. +func UniqueStrings(strSlice []string) []string { + if len(strSlice) < 2 { + return strSlice + } + + strMap := make(map[string]struct{}, len(strSlice)) + uniqueIndex := 1 + strMap[strSlice[0]] = struct{}{} + + for _, val := range strSlice[1:] { + if _, isDupe := strMap[val]; isDupe { + continue + } + + strSlice[uniqueIndex] = val + strMap[val] = struct{}{} + uniqueIndex++ + } + + return strSlice[:uniqueIndex:uniqueIndex] +} + +// Trim returns the origin slice with all the elements from "trimFrom" removed. +// +// The original order of the original elements may be altered. +// +// NOTE: this function does not allocate extra memory: the input slice is altered in-place. +// +// The returned slice is truncated in capacity to the number of unique elements. +func Trim(origin, trimFrom []string) []string { + length := len(origin) + if length == 0 || len(trimFrom) == 0 { // nothing to trim + return origin + } + + // For a map that does not escape the func, the go runtime will grow the buckets in the stack if we keep the initial hint to 0. + // See our comment above for function UniqueStrings(). + inTrimList := make(map[string]struct{}, len(trimFrom)) + + for _, val := range trimFrom { + inTrimList[val] = struct{}{} + } + + // NOTE: this version is very similar to the original "trimUnique" implementation, but does not + // need an extra function that manipulates pointers. Its performance is also about the same. + for i := 0; i < length; { + if _, found := inTrimList[origin[i]]; found { + origin[i] = origin[length-1] // drop the i-th element and replace it by the last element + length-- + + continue + } + + i++ + } + + return origin[:length:length] +} + +// TrimStable returns the origin slice with all the elements from "trimFrom" removed. +// +// The original order of the original elements is maintained. Memory and CPU efficiency is about the same as Trim(). +// +// NOTE: this function does not allocate extra memory: the input slice is altered in-place. +// +// The returned slice is truncated in capacity to the number of unique elements. +func TrimStable(origin, trimFrom []string) []string { + length := len(origin) + if length == 0 || len(trimFrom) == 0 { // nothing to trim + return origin + } + + // See our comment above for function UniqueStrings(). + inTrimList := make(map[string]struct{}, len(trimFrom)) + + for _, val := range trimFrom { + inTrimList[val] = struct{}{} + } + + trimmedIndex := 0 + for i, val := range origin { + if _, found := inTrimList[val]; !found { + if trimmedIndex < i { // copy only if shifting has begun + origin[trimmedIndex] = val // shift the current value left + } + trimmedIndex++ + } + } + + return origin[:trimmedIndex:trimmedIndex] +} + +// TrimStableUnique combines UniqueStrings and Trim in a single iteration. +// +// It returns the unique elements of the origin slice with all the elements from "trimFrom" removed. +// +// NOTE: this function does not allocate extra memory: the input slice is altered in-place. +// +// The returned slice is truncated in capacity to the number of unique elements. +// +// Calling TrimUnique in combination is slighly more efficient than calling these functions separately +// (the longer the slices, the larger the savings). +func TrimStableUnique(origin, trimFrom []string) []string { + if len(origin) == 0 || len(trimFrom) == 0 { // nothing to trim + return origin + } + + // See our comment above for function UniqueStrings(). + inTrimList := make(map[string]struct{}, len(trimFrom)) + strMap := make(map[string]struct{}, len(origin)) + + for _, val := range trimFrom { + inTrimList[val] = struct{}{} + } + + trimmedIndex := 0 + for _, val := range origin { + if _, isDupe := strMap[val]; !isDupe { + strMap[val] = struct{}{} + + if _, found := inTrimList[val]; !found { + origin[trimmedIndex] = val + trimmedIndex++ + } + } + } + + return origin[:trimmedIndex:trimmedIndex] +} + +// TrimUnique combines UniqueStrings and Trim in a single iteration. +// +// It returns the unique elements of the origin slice with all the elements from "trimFrom" removed. +// +// NOTE: this function does not allocate extra memory: the input slice is altered in-place. +// +// The returned slice is truncated in capacity to the number of unique elements. +// +// Calling TrimUnique in combination is slighly more efficient than calling these functions separately +// (the longer the slices, the larger the savings). +func TrimUnique(origin, trimFrom []string) []string { + length := len(origin) + if len(origin) == 0 || len(trimFrom) == 0 { // nothing to trim + return origin + } + + inTrimList := make(map[string]struct{}, len(trimFrom)) + strMap := make(map[string]struct{}, len(origin)) + + for _, val := range trimFrom { + inTrimList[val] = struct{}{} + } + + for i := 0; i < length; { + val := origin[i] + if _, isDupe := strMap[val]; !isDupe { + strMap[val] = struct{}{} + + if _, found := inTrimList[val]; found { + origin[i] = origin[length-1] // drop the i-th element and replace if by the last element + length-- + + continue + } + } + + i++ + } + + return origin[:length:length] +} + +// StringInSlice determines if a needle is in a haystack of strings. +func StringInSlice(haystack []string, needle string) bool { + for _, val := range haystack { + if val == needle { + return true + } + } + + return false +} diff --git a/reporthandling/internal/slices/string_slices_benchmark_test.go b/reporthandling/internal/slices/string_slices_benchmark_test.go new file mode 100644 index 00000000..01b2b42b --- /dev/null +++ b/reporthandling/internal/slices/string_slices_benchmark_test.go @@ -0,0 +1,185 @@ +package slices + +import ( + "math/rand" + "testing" + + "github.com/armosec/utils-go/str" +) + +var ( + fixedInput8, fixedInput16, fixedInput32, manyInput32, fixedTrimList []string + currentInput8, currentInput16, currentInput32, currentMany32, currentTrimList []string +) + +func init() { + // We make sure that we don't bias in benchmark alloc reporting. + // Allocation is performed only once, and each (serialized) benchmark + // copies the original values to the allocated space. + // + // Since the benchmarked methods rely on in-place modification, we want to + // compare every iteration with the same original input, while not introducing + // extraneous allocation due to the benchmarking machinery. + fixedInput8 = []string{ + "A", "C", "B", "B", "A", "D", "A", "C", + } + fixedInput16 = []string{ + "A", "C", "B", "B", "A", "D", "A", "C", + "A", "C", "B", "B", "A", "D", "A", "C", + } + fixedInput32 = []string{ + "A", "C", "B", "B", "A", "D", "A", "C", + "A", "C", "B", "B", "A", "D", "A", "C", + "A", "C", "B", "B", "A", "D", "A", "C", + "A", "C", "B", "B", "A", "D", "A", "C", + } + manyInput32 = []string{ + "A", "C", "B", "E", "A", "D", "F", "G", + "H", "H", "I", "J", "K", "L", "A", "C", + "M", "N", "B", "O", "P", "Q", "E", "R", + "S", "T", "U", "B", "V", "W", "X", "Y", + } + fixedTrimList = []string{ + "D", "A", "E", + } + + currentInput8 = make([]string, len(fixedInput8)) + currentInput16 = make([]string, len(fixedInput16)) + currentInput32 = make([]string, len(fixedInput32)) + currentMany32 = make([]string, len(manyInput32)) + currentTrimList = make([]string, len(fixedTrimList)) +} + +func input8() []string { + // copy original content, randomly shuffled + copy(currentInput8, fixedInput8) + + return shuffle(currentInput8) +} + +func input16() []string { + // copy original content, randomly shuffled + copy(currentInput16, fixedInput16) + + return shuffle(currentInput16) +} + +func input32() []string { + // copy original content, randomly shuffled + copy(currentInput16, fixedInput16) + + return shuffle(currentInput16) +} + +func many32() []string { + // copy original content, randomly shuffled + copy(currentInput16, fixedInput16) + + return shuffle(currentInput16) +} + +func trimList() []string { + copy(currentTrimList, fixedTrimList) + + return shuffle(currentTrimList) +} + +// shuffle a slice +func shuffle(input []string) []string { + for i := 0; i < len(input); i++ { + j := rand.Intn(len(input)) //nolint:gosec + input[i], input[j] = input[j], input[i] + } + + return input +} + +func BenchmarkUnique(b *testing.B) { + // do not run in parallel + + b.Run("UniqueStrings x 8", runUniqueBenchmark(UniqueStrings, input8)) + b.Run("UniqueStrings x 16", runUniqueBenchmark(UniqueStrings, input16)) + b.Run("UniqueStrings x 32", runUniqueBenchmark(UniqueStrings, input32)) + b.Run("UniqueStrings x 32 (many)", runUniqueBenchmark(UniqueStrings, many32)) + + b.Run("SliceStringToUnique x 8", runUniqueBenchmark(str.SliceStringToUnique, input16)) + b.Run("SliceStringToUnique x 16", runUniqueBenchmark(str.SliceStringToUnique, input16)) + b.Run("SliceStringToUnique x 32", runUniqueBenchmark(str.SliceStringToUnique, input32)) + b.Run("SliceStringToUnique x 32 (many)", runUniqueBenchmark(str.SliceStringToUnique, many32)) +} + +func BenchmarkTrim(b *testing.B) { + // do not run in parallel + + b.Run("Trim x 8", runTrimBenchmark(Trim, input8, trimList)) + b.Run("Trim x 16", runTrimBenchmark(Trim, input16, trimList)) + b.Run("Trim x 32", runTrimBenchmark(Trim, input32, trimList)) + b.Run("Trim x 32 (many)", runTrimBenchmark(Trim, many32, trimList)) + + b.Run("TrimStable x 8", runTrimBenchmark(TrimStable, input8, trimList)) + b.Run("TrimStable x 16", runTrimBenchmark(TrimStable, input16, trimList)) + b.Run("TrimStable x 32", runTrimBenchmark(TrimStable, input32, trimList)) + b.Run("TrimStable x 32", runTrimBenchmark(TrimStable, many32, trimList)) + + b.Run("TrimSwap x 8 (original version)", runTrimBenchmark(originalTrimmer, input8, trimList)) + b.Run("TrimSwap x 16 (original version)", runTrimBenchmark(originalTrimmer, input16, trimList)) + b.Run("TrimSwap x 32 (original version)", runTrimBenchmark(originalTrimmer, input32, trimList)) + b.Run("TrimSwap x 32 (many, original version)", runTrimBenchmark(originalTrimmer, many32, trimList)) + + b.Run("TrimUnique x 8", runTrimBenchmark(TrimUnique, input8, trimList)) + b.Run("TrimUnique x 16", runTrimBenchmark(TrimUnique, input16, trimList)) + b.Run("TrimUnique x 32", runTrimBenchmark(TrimUnique, input32, trimList)) + b.Run("TrimUnique x 32 (many)", runTrimBenchmark(TrimUnique, many32, trimList)) + + b.Run("TrimStableUnique x 8", runTrimBenchmark(TrimStableUnique, input8, trimList)) + b.Run("TrimStableUnique x 16", runTrimBenchmark(TrimStableUnique, input16, trimList)) + b.Run("TrimStableUnique x 32", runTrimBenchmark(TrimStableUnique, input32, trimList)) + b.Run("TrimStableUnique x 32 (many)", runTrimBenchmark(TrimStableUnique, many32, trimList)) +} + +func runUniqueBenchmark(fn func([]string) []string, input func() []string) func(*testing.B) { + return func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + + for n := 0; n < b.N; n++ { + input := input() + _ = fn(input) + } + } +} + +func runTrimBenchmark(fn func([]string, []string) []string, input, trimList func() []string) func(*testing.B) { + return func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + + for n := 0; n < b.N; n++ { + input, trimList := input(), trimList() + _ = fn(input, trimList) + } + } +} + +// originalTrimmer reminds us of the original implementation of the trimming, for the purpose +// of comparing benchmarks. +func originalTrimmer(origin, trimFrom []string) []string { + if len(origin) == 0 || len(trimFrom) == 0 { // if there is nothing to trim + return origin + } + toRemove := make(map[string]bool, len(trimFrom)) + + for i := range trimFrom { + toRemove[trimFrom[i]] = true + } + originLen := len(origin) + for i := 0; i < originLen; { + if _, ok := toRemove[origin[i]]; ok { + str.RemoveIndexFromStringSlice(&origin, i) // swap i-th element with the last one + originLen-- + } else { + i++ + } + } + return origin +} diff --git a/reporthandling/internal/slices/string_slices_test.go b/reporthandling/internal/slices/string_slices_test.go new file mode 100644 index 00000000..b8cbb4e1 --- /dev/null +++ b/reporthandling/internal/slices/string_slices_test.go @@ -0,0 +1,269 @@ +package slices + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type ( + trimArgs struct { + Origin []string + TrimFrom []string + Expected []string + ExpectedStable []string + } + + trimFixture struct { + Name string + Args trimArgs + } +) + +// Clone the Origin slice to be used in parallel tests that modify their input in-place. +func (a trimArgs) Clone() trimArgs { + if a.Origin == nil { + return a + } + + clone := trimArgs{ + Origin: make([]string, len(a.Origin)), + TrimFrom: a.TrimFrom, + Expected: a.Expected, + ExpectedStable: a.ExpectedStable, + } + copy(clone.Origin, a.Origin) + + return clone +} + +func TestTrimAndTrimUnique(t *testing.T) { + t.Parallel() + + for _, toPin := range trimTestCases() { + tt := toPin + ts := tt + tu := tt + tsu := tt + // the Trim* functions modify their input slice: we need a deep-copy of the Origin argument + ts.Args = tt.Args.Clone() + tu.Args = tt.Args.Clone() + tsu.Args = tt.Args.Clone() + + // exercise our variations around trimming slices... + t.Run("Trim with "+tt.Name, func(t *testing.T) { + t.Parallel() + + dd := Trim(tt.Args.Origin, tt.Args.TrimFrom) + assert.Equal(t, tt.Args.Expected, dd) + assert.Equalf(t, len(tt.Args.Expected), cap(dd), + "expected capacity to be truncated but got %d", cap(dd), + ) + }) + + t.Run("TrimStable with "+ts.Name, func(t *testing.T) { + t.Parallel() + + dd := TrimStable(ts.Args.Origin, ts.Args.TrimFrom) + var expected []string + if ts.Args.ExpectedStable != nil { + // with the stable version, the expected slice comes in a different order + expected = ts.Args.ExpectedStable + } else { + expected = ts.Args.Expected + } + + assert.Equal(t, expected, dd) + assert.Equalf(t, len(expected), cap(dd), + "expected capacity to be truncated but got %d", cap(dd), + ) + }) + + t.Run("TrimUnique with "+tu.Name, func(t *testing.T) { + t.Parallel() + + dd := TrimUnique(tu.Args.Origin, tu.Args.TrimFrom) + assert.Equal(t, tu.Args.Expected, dd) + assert.Equalf(t, len(tu.Args.Expected), cap(dd), + "expected capacity to be truncated but got %d", cap(dd), + ) + }) + + t.Run("TrimStableUnique with "+tsu.Name, func(t *testing.T) { + t.Parallel() + + dd := TrimStableUnique(tsu.Args.Origin, tsu.Args.TrimFrom) + var expected []string + if tsu.Args.ExpectedStable != nil { + // with the stable version, the expected slice comes in a different order + expected = tsu.Args.ExpectedStable + } else { + expected = tsu.Args.Expected + } + + assert.Equal(t, expected, dd) + assert.Equalf(t, len(expected), cap(dd), + "expected capacity to be truncated but got %d", cap(dd), + ) + }) + } + + t.Run("TrimStable should trim but not dedupe", func(t *testing.T) { + assert.EqualValues(t, + []string{"c", "c"}, + TrimStable([]string{"c", "a", "b", "c", "b"}, []string{"a", "b", "e"}), + ) + }) + + t.Run("TrimStableUnique should dedupe and trim", func(t *testing.T) { + assert.EqualValues(t, + []string{"c"}, + TrimStableUnique([]string{"c", "a", "b", "c", "b"}, []string{"a", "b", "e"}), + ) + }) + + t.Run("Trim should trim but not dedupe", func(t *testing.T) { + assert.EqualValues(t, + []string{"c", "c"}, + Trim([]string{"c", "a", "b", "c", "b"}, []string{"a", "b", "e"}), + ) + }) + + t.Run("TrimUnique should dedupe and trim", func(t *testing.T) { + assert.EqualValues(t, + []string{"c"}, + TrimStableUnique([]string{"c", "a", "b", "c", "b"}, []string{"a", "b", "e"}), + ) + }) +} + +func TestUniqueSlice(t *testing.T) { + t.Parallel() + + t.Run("should yield unique strings", func(t *testing.T) { + t.Parallel() + + uniques := UniqueStrings([]string{"B", "B", "A", "A", "B", "C", "B", "A"}) + + require.EqualValues(t, []string{"B", "A", "C"}, uniques) + require.Equal(t, 3, cap(uniques), + "expected capacity to be truncated but got %d", cap(uniques), + ) + }) + + t.Run("should yield empty", func(t *testing.T) { + t.Parallel() + + require.Empty(t, UniqueStrings([]string{})) + }) + + t.Run("should yield nil", func(t *testing.T) { + t.Parallel() + + require.Empty(t, UniqueStrings(nil)) + }) +} + +func TestStringInslice(t *testing.T) { + t.Parallel() + + require.True(t, StringInSlice([]string{"A", "B", "C"}, "B")) + require.False(t, StringInSlice([]string{"A", "B", "C"}, "D")) + require.False(t, StringInSlice([]string{}, "D")) + require.False(t, StringInSlice(nil, "D")) +} + +func trimTestCases() []trimFixture { + return []trimFixture{ + { + Name: "trim from beginning of slice (exhibits swapped elements)", + Args: trimArgs{ + Origin: []string{"a", "b", "c"}, + TrimFrom: []string{"a"}, + Expected: []string{"c", "b"}, + ExpectedStable: []string{"b", "c"}, + }, + }, + { + Name: "trim from middle of slice", + Args: trimArgs{ + Origin: []string{"a", "b", "c"}, + TrimFrom: []string{"b"}, + Expected: []string{"a", "c"}, + }, + }, + { + Name: "trim from end of slice", + Args: trimArgs{ + Origin: []string{"a", "b", "c"}, + TrimFrom: []string{"c"}, + Expected: []string{"a", "b"}, + }, + }, + { + Name: "do nothing", + Args: trimArgs{ + Origin: []string{"a", "b", "c"}, + TrimFrom: []string{"d"}, + Expected: []string{"a", "b", "c"}, + }, + }, + { + Name: "trim all", + Args: trimArgs{ + Origin: []string{"a", "b", "c"}, + TrimFrom: []string{"a", "b", "c"}, + Expected: []string{}, + }, + }, + { + Name: "trimFrom larger", + Args: trimArgs{ + Origin: []string{"a", "b", "c"}, + TrimFrom: []string{"a", "b", "e", "d"}, + Expected: []string{"c"}, + }, + }, + { + Name: "trim all not sorted", + Args: trimArgs{ + Origin: []string{"c", "a", "b"}, + TrimFrom: []string{"a", "b", "c"}, + Expected: []string{}, + }, + }, + { + Name: "nothing to do (1)", + Args: trimArgs{ + Origin: []string{}, + TrimFrom: []string{"d"}, + Expected: []string{}, + }, + }, + { + Name: "nothing to do (2)", + Args: trimArgs{ + Origin: []string{"a"}, + TrimFrom: []string{}, + Expected: []string{"a"}, + }, + }, + { + Name: "with nil origin", + Args: trimArgs{ + Origin: nil, + TrimFrom: []string{}, + Expected: nil, + }, + }, + { + Name: "with nil trim list", + Args: trimArgs{ + Origin: []string{"a", "b"}, + TrimFrom: []string{}, + Expected: []string{"a", "b"}, + }, + }, + } +} diff --git a/reporthandling/resultshandling.go b/reporthandling/resultshandling.go index 5a50e3a9..b87f9751 100644 --- a/reporthandling/resultshandling.go +++ b/reporthandling/resultshandling.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/kubescape/opa-utils/objectsenvelopes" + "github.com/kubescape/opa-utils/reporthandling/internal/slices" "github.com/open-policy-agent/opa/rego" ) @@ -54,38 +55,20 @@ func GetUniqueResources(k8sResources []map[string]interface{}) []map[string]inte return k8sResources } -// GetUniqueResources the list of resources can contain duplications, this function removes the resource duplication based on workloadinterface.GetID +// GetUniqueResourcesIDs yields the list of unique resource IDs. Duplicates are removed, based on the workload.GetID() interface method. +// +// NOTE: the input slice is modified in-place. func GetUniqueResourcesIDs(k8sResourcesList []string) []string { - uniqueRuleResponses := map[string]bool{} - k8sResourcesNewList := []string{} - - for i := range k8sResourcesList { - if found := uniqueRuleResponses[k8sResourcesList[i]]; !found { - uniqueRuleResponses[k8sResourcesList[i]] = true - k8sResourcesNewList = append(k8sResourcesNewList, k8sResourcesList[i]) - } - } - return k8sResourcesNewList + return slices.UniqueStrings(k8sResourcesList) } -// TrimUniqueResources trim the list, this wil trim in case the same resource appears in the warning list and in the failed list +// TrimUniqueResources trims the origin list to contain only elements that are NOT already present in the trimFrom list. +// +// # This is used to cover the case when the same resource appears in the warning list and in the failed list +// +// NOTE: the origin slice is modified in-place. func TrimUniqueIDs(origin, trimFrom []string) []string { - if len(origin) == 0 || len(trimFrom) == 0 { // if there is nothing to trim - return origin - } - uniqueResources := map[string]bool{} - listResources := []string{} - - for i := range trimFrom { - uniqueResources[trimFrom[i]] = true - } - - for i := range origin { - if found := uniqueResources[origin[i]]; !found { - listResources = append(listResources, origin[i]) - } - } - return listResources + return slices.TrimStable(origin, trimFrom) } func removeFromSlice(k8sResources []map[string]interface{}, i int) []map[string]interface{} {