From 697f4471305da0a99ac63e16f6f2a44bb0f83ae3 Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Mon, 15 Jul 2024 18:43:32 +0545 Subject: [PATCH 1/2] feat: cache resource id map [skip ci] --- api/cache.go | 2 +- api/v1/interface.go | 18 +++++++ scrapers/kubernetes/kubernetes.go | 76 +++++++++++++------------- scrapers/kubernetes/resource_map.go | 83 +++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+), 39 deletions(-) create mode 100644 scrapers/kubernetes/resource_map.go diff --git a/api/cache.go b/api/cache.go index ea42b38c..7ab31410 100644 --- a/api/cache.go +++ b/api/cache.go @@ -46,7 +46,7 @@ func (t *TempCache) Find(ctx ScrapeContext, lookup v1.ExternalID) (*models.Confi } var result models.ConfigItem - query := ctx.DB().Limit(1).Where("deleted_at IS NULL").Where("type = ? and external_id @> ?", lookup.ConfigType, pq.StringArray{externalID}) + query := ctx.DB().Limit(1).Order("updated_at DESC").Where("deleted_at IS NULL").Where("type = ? and external_id @> ?", lookup.ConfigType, pq.StringArray{externalID}) if scraperID != "all" && scraperID != "" { query = query.Where("scraper_id = ?", scraperID) } diff --git a/api/v1/interface.go b/api/v1/interface.go index 9e65a302..fc451994 100644 --- a/api/v1/interface.go +++ b/api/v1/interface.go @@ -269,6 +269,24 @@ type RelationshipResult struct { Relationship string } +func (t RelationshipResult) WithConfig(id string, ext ExternalID) RelationshipResult { + if id != "" { + t.ConfigID = id + } else { + t.ConfigExternalID = ext + } + return t +} + +func (t RelationshipResult) WithRelated(id string, ext ExternalID) RelationshipResult { + if id != "" { + t.RelatedConfigID = id + } else { + t.RelatedExternalID = ext + } + return t +} + func (r RelationshipResult) String() string { s := "" if r.ConfigID != "" { diff --git a/scrapers/kubernetes/kubernetes.go b/scrapers/kubernetes/kubernetes.go index 4106131c..10652d95 100644 --- a/scrapers/kubernetes/kubernetes.go +++ b/scrapers/kubernetes/kubernetes.go @@ -7,6 +7,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" "github.com/Jeffail/gabs/v2" @@ -35,6 +36,10 @@ import ( const ConfigTypePrefix = "Kubernetes::" +// TODO: this lock should be per scraper(cluster) +var allClustersResourceIDMap = map[string]ResourceIDMap{} +var clusterResourceIDLock sync.Mutex + // ReservedAnnotations const ( // AnnotationIgnoreConfig excludes the object from being scraped @@ -254,9 +259,15 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc } resourceIDMap := getResourceIDsFromObjs(objs) - resourceIDMap[""]["Cluster"] = make(map[string]string) - resourceIDMap[""]["Cluster"][config.ClusterName] = clusterID - resourceIDMap[""]["Cluster"]["selfRef"] = clusterID // For shorthand + resourceIDMap.Set("", "Cluster", config.ClusterName, clusterID) + resourceIDMap.Set("", "Cluster", "selfRef", clusterID) // For shorthand + + // For incremental scrape, we do not have all the data in the resource ID map + // we use it from the cached resource id map + clusterResourceIDLock.Lock() + resourceIDMap = mergeResourceIDMap(resourceIDMap, allClustersResourceIDMap[string(ctx.ScrapeConfig().GetUID())]) + allClustersResourceIDMap[string(ctx.ScrapeConfig().GetUID())] = resourceIDMap + clusterResourceIDLock.Unlock() objChangeExclusionByType, objChangeExclusionBySeverity := formObjChangeExclusionMap(objs) @@ -396,10 +407,12 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc nodeExternalID := lo.CoalesceOrEmpty(nodeID, getKubernetesAlias("Node", "", nodeName)) relationships = append(relationships, v1.RelationshipResult{ - ConfigExternalID: v1.ExternalID{ExternalID: []string{nodeExternalID}, ConfigType: ConfigTypePrefix + "Node"}, - RelatedExternalID: v1.ExternalID{ExternalID: []string{string(obj.GetUID())}, ConfigType: ConfigTypePrefix + "Pod"}, - Relationship: "NodePod", - }) + RelatedConfigID: string(obj.GetUID()), + Relationship: "NodePod", + }.WithConfig( + resourceIDMap.Get("", "Node", nodeName), + v1.ExternalID{ExternalID: []string{nodeExternalID}, ConfigType: ConfigTypePrefix + "Node"}, + )) } if obj.GetLabels()["app.kubernetes.io/name"] == "aws-node" { @@ -437,18 +450,22 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc if obj.GetNamespace() != "" { relationships = append(relationships, v1.RelationshipResult{ - ConfigExternalID: v1.ExternalID{ExternalID: []string{fmt.Sprintf("Kubernetes/Namespace//%s", obj.GetNamespace())}, ConfigType: ConfigTypePrefix + "Namespace"}, - RelatedExternalID: v1.ExternalID{ExternalID: []string{string(obj.GetUID())}, ConfigType: getConfigTypePrefix(obj.GetAPIVersion()) + obj.GetKind()}, - Relationship: "Namespace" + obj.GetKind(), - }) + RelatedConfigID: string(obj.GetUID()), + Relationship: "Namespace" + obj.GetKind(), + }.WithConfig( + resourceIDMap.Get("", "Namespace", obj.GetNamespace()), + v1.ExternalID{ExternalID: []string{getKubernetesAlias("Namespace", "", obj.GetNamespace())}, ConfigType: ConfigTypePrefix + "Namespace"}, + )) } for _, ownerRef := range obj.GetOwnerReferences() { relationships = append(relationships, v1.RelationshipResult{ - ConfigExternalID: v1.ExternalID{ExternalID: []string{string(ownerRef.UID)}, ConfigType: getConfigTypePrefix(ownerRef.APIVersion) + ownerRef.Kind}, - RelatedExternalID: v1.ExternalID{ExternalID: []string{string(obj.GetUID())}, ConfigType: getConfigTypePrefix(obj.GetAPIVersion()) + obj.GetKind()}, - Relationship: ownerRef.Kind + obj.GetKind(), - }) + RelatedConfigID: string(obj.GetUID()), + Relationship: ownerRef.Kind + obj.GetKind(), + }.WithConfig( + resourceIDMap.Get(obj.GetNamespace(), ownerRef.Kind, ownerRef.Name), + v1.ExternalID{ExternalID: []string{string(ownerRef.UID)}, ConfigType: getConfigTypePrefix(ownerRef.APIVersion) + ownerRef.Kind}, + )) } for _, f := range config.Relationships { @@ -471,9 +488,12 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc for _, id := range linkedConfigItemIDs { rel := v1.RelationshipResult{ - RelatedExternalID: v1.ExternalID{ExternalID: []string{string(obj.GetUID())}, ConfigType: getConfigTypePrefix(obj.GetKind()) + obj.GetKind()}, - ConfigID: id.String(), - } + ConfigID: id.String(), + }.WithRelated( + resourceIDMap.Get(obj.GetNamespace(), obj.GetKind(), obj.GetName()), + v1.ExternalID{ExternalID: []string{string(obj.GetUID())}, ConfigType: getConfigTypePrefix(obj.GetAPIVersion()) + obj.GetKind()}, + ) + relationships = append(relationships, rel) } } @@ -720,26 +740,6 @@ func extractDeployNameFromReplicaSet(rs string) string { return strings.Join(split, "-") } -func getResourceIDsFromObjs(objs []*unstructured.Unstructured) map[string]map[string]map[string]string { - // {Namespace: {Kind: {Name: ID}}} - resourceIDMap := make(map[string]map[string]map[string]string) - resourceIDMap[""] = make(map[string]map[string]string) - - for _, obj := range objs { - if collections.Contains([]string{"Namespace", "Deployment", "Node"}, obj.GetKind()) { - if resourceIDMap[obj.GetNamespace()] == nil { - resourceIDMap[obj.GetNamespace()] = make(map[string]map[string]string) - } - if resourceIDMap[obj.GetNamespace()][obj.GetKind()] == nil { - resourceIDMap[obj.GetNamespace()][obj.GetKind()] = make(map[string]string) - } - resourceIDMap[obj.GetNamespace()][obj.GetKind()][obj.GetName()] = string(obj.GetUID()) - } - } - - return resourceIDMap -} - //nolint:errcheck func cleanKubernetesObject(obj map[string]any) (map[string]any, error) { o := gabs.Wrap(obj) diff --git a/scrapers/kubernetes/resource_map.go b/scrapers/kubernetes/resource_map.go new file mode 100644 index 00000000..7c77b0cb --- /dev/null +++ b/scrapers/kubernetes/resource_map.go @@ -0,0 +1,83 @@ +package kubernetes + +import "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + +// map: +type ResourceIDMap map[string]map[string]map[string]string + +func (t ResourceIDMap) Set(namespace, kind, name, id string) { + if t[namespace] == nil { + t[namespace] = make(map[string]map[string]string) + } + if t[namespace][kind] == nil { + t[namespace][kind] = make(map[string]string) + } + t[namespace][kind][name] = id +} + +func (t ResourceIDMap) Get(namespace, kind, name string) string { + if kinds, ok := t[namespace]; ok { + if names, ok := kinds[kind]; ok { + if id, ok := names[name]; ok { + return id + } + } + } + + return "" +} + +func getResourceIDsFromObjs(objs []*unstructured.Unstructured) ResourceIDMap { + resourceIDMap := make(map[string]map[string]map[string]string) + for _, obj := range objs { + if resourceIDMap[obj.GetNamespace()] == nil { + resourceIDMap[obj.GetNamespace()] = make(map[string]map[string]string) + } + if resourceIDMap[obj.GetNamespace()][obj.GetKind()] == nil { + resourceIDMap[obj.GetNamespace()][obj.GetKind()] = make(map[string]string) + } + resourceIDMap[obj.GetNamespace()][obj.GetKind()][obj.GetName()] = string(obj.GetUID()) + } + + return resourceIDMap +} + +func mergeResourceIDMap(latest, cached ResourceIDMap) ResourceIDMap { + if len(latest) == 0 { + return cached + } + + if len(cached) == 0 { + return latest + } + + output := make(ResourceIDMap) + + // First, copy all data from cached + for k, v := range cached { + output[k] = make(map[string]map[string]string) + for k2, v2 := range v { + output[k][k2] = make(map[string]string) + for k3, v3 := range v2 { + output[k][k2][k3] = v3 + } + } + } + + // Then, update or add data from latest + for k, v := range latest { + if _, ok := output[k]; !ok { + output[k] = make(map[string]map[string]string) + } + for k2, v2 := range v { + if _, ok := output[k][k2]; !ok { + output[k][k2] = make(map[string]string) + } + for k3, v3 := range v2 { + output[k][k2][k3] = v3 + } + } + } + + return output +} From 92592da32b6737402e5a412820914fd2eb815ff8 Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Mon, 15 Jul 2024 21:16:29 +0545 Subject: [PATCH 2/2] locks on resource maps --- scrapers/kubernetes/kubernetes.go | 40 +++++++--------- scrapers/kubernetes/resource_map.go | 73 ++++++++++++++++++++++++----- 2 files changed, 80 insertions(+), 33 deletions(-) diff --git a/scrapers/kubernetes/kubernetes.go b/scrapers/kubernetes/kubernetes.go index 10652d95..d9b2cc7e 100644 --- a/scrapers/kubernetes/kubernetes.go +++ b/scrapers/kubernetes/kubernetes.go @@ -7,7 +7,6 @@ import ( "regexp" "strconv" "strings" - "sync" "time" "github.com/Jeffail/gabs/v2" @@ -36,9 +35,7 @@ import ( const ConfigTypePrefix = "Kubernetes::" -// TODO: this lock should be per scraper(cluster) -var allClustersResourceIDMap = map[string]ResourceIDMap{} -var clusterResourceIDLock sync.Mutex +var resourceIDMapPerCluster PerClusterResourceIDMap // ReservedAnnotations const ( @@ -231,7 +228,7 @@ func getObjectChangeExclusionAnnotations(ctx api.ScrapeContext, id string, exclu // ExtractResults extracts scrape results from the given list of kuberenetes objects. // - withCluster: if true, will create & add a scrape result for the kubernetes cluster. -func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstructured.Unstructured, withCluster bool) v1.ScrapeResults { +func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstructured.Unstructured, isIncrementalScrape bool) v1.ScrapeResults { var ( results v1.ScrapeResults changeResults v1.ScrapeResults @@ -258,16 +255,17 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc Tags: v1.Tags{{Name: "cluster", Value: config.ClusterName}}, } - resourceIDMap := getResourceIDsFromObjs(objs) + resourceIDMap := NewResourceIDMap(objs) resourceIDMap.Set("", "Cluster", config.ClusterName, clusterID) resourceIDMap.Set("", "Cluster", "selfRef", clusterID) // For shorthand - // For incremental scrape, we do not have all the data in the resource ID map - // we use it from the cached resource id map - clusterResourceIDLock.Lock() - resourceIDMap = mergeResourceIDMap(resourceIDMap, allClustersResourceIDMap[string(ctx.ScrapeConfig().GetUID())]) - allClustersResourceIDMap[string(ctx.ScrapeConfig().GetUID())] = resourceIDMap - clusterResourceIDLock.Unlock() + if isIncrementalScrape { + // On incremental scrape, we do not have all the data in the resource ID map. + // we use it from the cached resource id map. + resourceIDMap.data = resourceIDMapPerCluster.MergeAndUpdate(string(ctx.ScrapeConfig().GetUID()), resourceIDMap.data) + } else { + resourceIDMapPerCluster.Swap(string(ctx.ScrapeConfig().GetUID()), resourceIDMap.data) + } objChangeExclusionByType, objChangeExclusionBySeverity := formObjChangeExclusionMap(objs) @@ -403,7 +401,7 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc var nodeName string if spec["nodeName"] != nil { nodeName = spec["nodeName"].(string) - nodeID := resourceIDMap[""]["Node"][nodeName] + nodeID := resourceIDMap.Get("", "Node", nodeName) nodeExternalID := lo.CoalesceOrEmpty(nodeID, getKubernetesAlias("Node", "", nodeName)) relationships = append(relationships, v1.RelationshipResult{ @@ -460,12 +458,10 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc for _, ownerRef := range obj.GetOwnerReferences() { relationships = append(relationships, v1.RelationshipResult{ + ConfigID: string(ownerRef.UID), RelatedConfigID: string(obj.GetUID()), Relationship: ownerRef.Kind + obj.GetKind(), - }.WithConfig( - resourceIDMap.Get(obj.GetNamespace(), ownerRef.Kind, ownerRef.Name), - v1.ExternalID{ExternalID: []string{string(ownerRef.UID)}, ConfigType: getConfigTypePrefix(ownerRef.APIVersion) + ownerRef.Kind}, - )) + }) } for _, f := range config.Relationships { @@ -612,7 +608,7 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc } results = append(results, changeResults...) - if withCluster { + if isIncrementalScrape { results = append([]v1.ScrapeResult{cluster}, results...) } @@ -633,15 +629,15 @@ func ExtractResults(ctx api.ScrapeContext, config v1.Kubernetes, objs []*unstruc // getKubernetesParent returns a list of potential parents in order. // Example: For a Pod the parents would be [Replicaset, Namespace, Cluster] -func getKubernetesParent(obj *unstructured.Unstructured, exclusions v1.KubernetesExclusionConfig, resourceIDMap map[string]map[string]map[string]string) []v1.ConfigExternalKey { +func getKubernetesParent(obj *unstructured.Unstructured, exclusions v1.KubernetesExclusionConfig, resourceIDMap *ResourceIDMapContainer) []v1.ConfigExternalKey { var allParents []v1.ConfigExternalKey allParents = append(allParents, v1.ConfigExternalKey{ Type: ConfigTypePrefix + "Cluster", - ExternalID: resourceIDMap[""]["Cluster"]["selfRef"], + ExternalID: resourceIDMap.Get("", "Cluster", "selfRef"), }) if obj.GetNamespace() != "" { - parentExternalID := resourceIDMap[""]["Namespace"][obj.GetNamespace()] + parentExternalID := resourceIDMap.Get("", "Namespace", obj.GetNamespace()) if parentExternalID == "" { // An incremental scraper may not have the Namespace object. // We can instead use the alias as the external id. @@ -661,7 +657,7 @@ func getKubernetesParent(obj *unstructured.Unstructured, exclusions v1.Kubernete // be its Deployment if obj.GetKind() == "Pod" && lo.Contains(exclusions.Kinds, "ReplicaSet") && ref.Kind == "ReplicaSet" { deployName := extractDeployNameFromReplicaSet(ref.Name) - parentExternalID := resourceIDMap[obj.GetNamespace()]["Deployment"][deployName] + parentExternalID := resourceIDMap.Get(obj.GetNamespace(), "Deployment", deployName) allParents = append([]v1.ConfigExternalKey{{ Type: ConfigTypePrefix + "Deployment", ExternalID: parentExternalID, diff --git a/scrapers/kubernetes/resource_map.go b/scrapers/kubernetes/resource_map.go index 7c77b0cb..38aa5b03 100644 --- a/scrapers/kubernetes/resource_map.go +++ b/scrapers/kubernetes/resource_map.go @@ -1,22 +1,37 @@ package kubernetes -import "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +import ( + "sync" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) // map: type ResourceIDMap map[string]map[string]map[string]string -func (t ResourceIDMap) Set(namespace, kind, name, id string) { - if t[namespace] == nil { - t[namespace] = make(map[string]map[string]string) +type ResourceIDMapContainer struct { + mu sync.RWMutex + data ResourceIDMap +} + +func (t *ResourceIDMapContainer) Set(namespace, kind, name, id string) { + t.mu.Lock() + defer t.mu.Unlock() + + if t.data[namespace] == nil { + t.data[namespace] = make(map[string]map[string]string) } - if t[namespace][kind] == nil { - t[namespace][kind] = make(map[string]string) + if t.data[namespace][kind] == nil { + t.data[namespace][kind] = make(map[string]string) } - t[namespace][kind][name] = id + t.data[namespace][kind][name] = id } -func (t ResourceIDMap) Get(namespace, kind, name string) string { - if kinds, ok := t[namespace]; ok { +func (t *ResourceIDMapContainer) Get(namespace, kind, name string) string { + t.mu.RLock() + defer t.mu.RUnlock() + + if kinds, ok := t.data[namespace]; ok { if names, ok := kinds[kind]; ok { if id, ok := names[name]; ok { return id @@ -27,7 +42,40 @@ func (t ResourceIDMap) Get(namespace, kind, name string) string { return "" } -func getResourceIDsFromObjs(objs []*unstructured.Unstructured) ResourceIDMap { +type PerClusterResourceIDMap struct { + mu sync.Mutex + data map[string]ResourceIDMap +} + +func (t *PerClusterResourceIDMap) Swap(clusterID string, resourceIDMap ResourceIDMap) { + t.mu.Lock() + defer t.mu.Unlock() + + if t.data == nil { + t.data = make(map[string]ResourceIDMap) + } + + t.data[clusterID] = resourceIDMap +} + +func (t *PerClusterResourceIDMap) MergeAndUpdate(clusterID string, resourceIDMap ResourceIDMap) ResourceIDMap { + t.mu.Lock() + defer t.mu.Unlock() + + cached, ok := t.data[clusterID] + if ok { + resourceIDMap = mergeResourceIDMap(resourceIDMap, cached) + } + + if t.data == nil { + t.data = make(map[string]ResourceIDMap) + } + + t.data[clusterID] = resourceIDMap + return resourceIDMap +} + +func NewResourceIDMap(objs []*unstructured.Unstructured) *ResourceIDMapContainer { resourceIDMap := make(map[string]map[string]map[string]string) for _, obj := range objs { if resourceIDMap[obj.GetNamespace()] == nil { @@ -39,7 +87,10 @@ func getResourceIDsFromObjs(objs []*unstructured.Unstructured) ResourceIDMap { resourceIDMap[obj.GetNamespace()][obj.GetKind()][obj.GetName()] = string(obj.GetUID()) } - return resourceIDMap + return &ResourceIDMapContainer{ + data: resourceIDMap, + mu: sync.RWMutex{}, + } } func mergeResourceIDMap(latest, cached ResourceIDMap) ResourceIDMap {