From b09584cf4962aca95b411dac476d83dedce46fd6 Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Thu, 23 May 2024 22:04:00 +0545 Subject: [PATCH] fix: parent id of Namespace & Cluster in incremental k8s scraper In an incremental scraper, we don't have the namespace object. Hence, resourceIDMap would be empty for all the namespaces. If any namespaced object, like a Deployment, were to be scraped by the incremental scraper, we wouldn't be able to set the parent of that deployment because we don't know the id of the parent namespace. In the workload cluster, the incremental scraper wasn't able to set the parent for a newly created cronjob so the saving of CronJob config failed. Consequently, saving of the job and also the pod would fail. This would happen until a full scrape. --- db/models/config_item.go | 5 +++-- db/update.go | 4 ++++ scrapers/kubernetes/kubernetes.go | 22 ++++++++++++++++------ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/db/models/config_item.go b/db/models/config_item.go index 2b31d5bf2..a0d7a7651 100644 --- a/db/models/config_item.go +++ b/db/models/config_item.go @@ -10,6 +10,7 @@ import ( "github.com/flanksource/duty/types" "github.com/google/uuid" "github.com/lib/pq" + "github.com/samber/lo" ) // ConfigItem represents the config item database table @@ -48,10 +49,10 @@ type ConfigItem struct { func (ci ConfigItem) String() string { if len(ci.ExternalID) == 0 { - return fmt.Sprintf("id=%s name=%s type=%s", ci.ID, *ci.Type, *ci.Name) + return fmt.Sprintf("id=%s type=%s name=%s ", ci.ID, lo.FromPtr(ci.Type), lo.FromPtr(ci.Name)) } - return fmt.Sprintf("id=%s name=%s type=%s external_id=%s", ci.ID, *ci.Type, *ci.Name, ci.ExternalID[0]) + return fmt.Sprintf("id=%s type=%s name=%s external_id=%s", ci.ID, lo.FromPtr(ci.Type), lo.FromPtr(ci.Name), ci.ExternalID[0]) } func (ci ConfigItem) ConfigJSONStringMap() (map[string]interface{}, error) { diff --git a/db/update.go b/db/update.go index 65fedb044..ee8ca64ea 100644 --- a/db/update.go +++ b/db/update.go @@ -711,6 +711,10 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime func setConfigParents(ctx api.ScrapeContext, parentTypeToConfigMap map[configExternalKey]string, allConfigs []*models.ConfigItem) error { for _, ci := range allConfigs { + if ci.ParentID != nil { + continue // existing item. Parent is already set. + } + if ci.ParentExternalID == "" || ci.ParentType == "" { continue } diff --git a/scrapers/kubernetes/kubernetes.go b/scrapers/kubernetes/kubernetes.go index 2dfcfef78..719c6a7fa 100644 --- a/scrapers/kubernetes/kubernetes.go +++ b/scrapers/kubernetes/kubernetes.go @@ -421,7 +421,7 @@ func ExtractResults(ctx context.Context, config v1.Kubernetes, objs []*unstructu return results.Errorf(err, "failed to clean kubernetes object") } - parentType, parentExternalID := getKubernetesParent(obj, config.Exclusions, resourceIDMap) + parentType, parentExternalID := getKubernetesParent(obj, config, resourceIDMap) results = append(results, v1.ScrapeResult{ BaseScraper: config.BaseScraper, Name: obj.GetName(), @@ -439,7 +439,7 @@ func ExtractResults(ctx context.Context, config v1.Kubernetes, objs []*unstructu ID: string(obj.GetUID()), Labels: stripLabels(labels, "-hash"), Tags: tags, - Aliases: getKubernetesAlias(obj), + Aliases: []string{getKubernetesAlias(obj.GetKind(), obj.GetNamespace(), obj.GetName())}, ParentExternalID: parentExternalID, ParentType: ConfigTypePrefix + parentType, RelationshipResults: relationships, @@ -466,14 +466,14 @@ func ExtractResults(ctx context.Context, config v1.Kubernetes, objs []*unstructu return results } -func getKubernetesParent(obj *unstructured.Unstructured, exclusions v1.KubernetesExclusionConfig, resourceIDMap map[string]map[string]map[string]string) (string, string) { +func getKubernetesParent(obj *unstructured.Unstructured, config v1.Kubernetes, resourceIDMap map[string]map[string]map[string]string) (string, string) { var parentExternalID, parentConfigType string // This will work for pods and replicasets if len(obj.GetOwnerReferences()) > 0 { ref := obj.GetOwnerReferences()[0] - if obj.GetKind() == "Pod" && lo.Contains(exclusions.Kinds, "ReplicaSet") { + if obj.GetKind() == "Pod" && lo.Contains(config.Exclusions.Kinds, "ReplicaSet") { // If ReplicaSet is excluded then we want the pod's direct parent to // be its Deployment if ref.Kind == "ReplicaSet" { @@ -492,17 +492,27 @@ func getKubernetesParent(obj *unstructured.Unstructured, exclusions v1.Kubernete if obj.GetNamespace() != "" { parentConfigType = "Namespace" parentExternalID = resourceIDMap[""]["Namespace"][obj.GetNamespace()] + + if obj.GetKind() == "CronJob" { + _ = obj + } + + if parentExternalID == "" { + parentExternalID = getKubernetesAlias("Namespace", "", obj.GetNamespace()) + } + return parentConfigType, parentExternalID } // Everything which is not namespaced should be mapped to cluster parentConfigType = "Cluster" parentExternalID = resourceIDMap[""]["Cluster"]["selfRef"] + return parentConfigType, parentExternalID } -func getKubernetesAlias(obj *unstructured.Unstructured) []string { - return []string{strings.Join([]string{"Kubernetes", obj.GetKind(), obj.GetNamespace(), obj.GetName()}, "/")} +func getKubernetesAlias(kind, namespace, name string) string { + return strings.Join([]string{"Kubernetes", kind, namespace, name}, "/") } func updateOptions(ctx context.Context, opts *options.KetallOptions, config v1.Kubernetes) (*options.KetallOptions, error) {