From b61f406bf2194becc36f80d68fae9a9229b57cca Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Wed, 29 May 2024 20:07:22 +0545 Subject: [PATCH] feat: remove the need to manually set the root. we use a dummy root. --- db/update.go | 127 +++++++++++++++++++++-------------------------- scrapers/cron.go | 4 +- scrapers/run.go | 12 +---- 3 files changed, 59 insertions(+), 84 deletions(-) diff --git a/db/update.go b/db/update.go index 524ce5f3a..07fe86af3 100644 --- a/db/update.go +++ b/db/update.go @@ -313,12 +313,21 @@ func UpdateAnalysisStatusBefore(ctx api.ScrapeContext, before time.Time, scraper // SaveResults creates or update a configuration with config changes func SaveResults(ctx api.ScrapeContext, results []v1.ScrapeResult) error { + return saveResults(ctx, false, results) +} + +// SaveResults creates or update a configuration with config changes +func SavePartialResults(ctx api.ScrapeContext, results []v1.ScrapeResult) error { + return saveResults(ctx, true, results) +} + +func saveResults(ctx api.ScrapeContext, isPartialResultSet bool, results []v1.ScrapeResult) error { startTime, err := GetCurrentDBTime(ctx) if err != nil { return fmt.Errorf("unable to get current db time: %w", err) } - newConfigs, configsToUpdate, newChanges, changesToUpdate, err := extractConfigsAndChangesFromResults(ctx, startTime, results) + newConfigs, configsToUpdate, newChanges, changesToUpdate, err := extractConfigsAndChangesFromResults(ctx, startTime, isPartialResultSet, results) if err != nil { return fmt.Errorf("failed to extract configs & changes from results: %w", err) } @@ -593,7 +602,7 @@ type configExternalKey struct { parentType string } -func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime time.Time, results []v1.ScrapeResult) ([]*models.ConfigItem, []*updateConfigArgs, []*models.ConfigChange, []*models.ConfigChange, error) { +func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime time.Time, isPartialResultSet bool, results []v1.ScrapeResult) ([]*models.ConfigItem, []*updateConfigArgs, []*models.ConfigChange, []*models.ConfigChange, error) { var ( newConfigs = make([]*models.ConfigItem, 0, len(results)) configsToUpdate = make([]*updateConfigArgs, 0, len(results)) @@ -601,7 +610,6 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime newChanges = make([]*models.ConfigChange, 0, len(results)) changesToUpdate = make([]*models.ConfigChange, 0, len(results)) - root string tree = graph.New(graph.StringHash, graph.Rooted(), graph.PreventCycles()) allConfigs = make([]*models.ConfigItem, 0, len(results)) @@ -627,10 +635,6 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime return nil, nil, nil, nil, fmt.Errorf("config item %s has no external id", ci) } - if isTreeRoot(lo.FromPtr(ci.Type)) { - root = ci.ID - } - parentExternalKey := configExternalKey{externalID: ci.ExternalID[0], parentType: lo.FromPtr(ci.Type)} parentTypeToConfigMap[parentExternalKey] = ci.ID @@ -681,11 +685,11 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime // This is because, on the first run, we don't have any configs at all in the DB. // So, all the parent lookups will return empty result and no parent will be set. // This way, we can first look for the parents within the result set. - if err := setConfigParents(ctx, root, parentTypeToConfigMap, allConfigs); err != nil { + if err := setConfigParents(ctx, parentTypeToConfigMap, allConfigs); err != nil { return nil, nil, nil, nil, fmt.Errorf("unable to setup parents: %w", err) } - if err := setConfigPaths(ctx, tree, root, allConfigs); err != nil { + if err := setConfigPaths(ctx, tree, isPartialResultSet, allConfigs); err != nil { return nil, nil, nil, nil, fmt.Errorf("unable to set config paths: %w", err) } @@ -706,21 +710,14 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime return newConfigs, configsToUpdate, newChanges, changesToUpdate, nil } -func setConfigParents(ctx api.ScrapeContext, root string, parentTypeToConfigMap map[configExternalKey]string, allConfigs []*models.ConfigItem) error { +func setConfigParents(ctx api.ScrapeContext, parentTypeToConfigMap map[configExternalKey]string, allConfigs []*models.ConfigItem) error { for _, ci := range allConfigs { if ci.ParentID != nil { continue // existing item. Parent is already set. } if ci.ParentExternalID == "" || ci.ParentType == "" { - if ci.ID == root || ctx.ScrapeConfig().IsCustom() { - continue - } else if lo.FromPtr(ci.Type) == v1.AWSRegion { - // Regions don't have parents but they are also not the root of the aws scraped resources - continue - } else { - return fmt.Errorf("config %s has no parent", ci) - } + continue } if parentID, found := parentTypeToConfigMap[configExternalKey{ @@ -743,7 +740,7 @@ func setConfigParents(ctx api.ScrapeContext, root string, parentTypeToConfigMap return nil } -func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string], allConfigs []*models.ConfigItem) (string, error) { +func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string], root string, allConfigs []*models.ConfigItem) error { // When we have a partial result set from an incremental scraper // we try to form a partial tree that's just sufficient to // connect the config item back to the root. @@ -759,13 +756,12 @@ func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string] configIDs[c.ID] = struct{}{} } - var root string for _, c := range allConfigs { if c.ParentID == nil { // We aren't supposed to hit this point, except when an incremental scraper runs before a full scrape // // We fail early here than failing on db insert. - return "", fmt.Errorf("a non root config found without a parent %s", c) + return fmt.Errorf("a non root config found without a parent %s", c) } if _, found := configIDs[*c.ParentID]; found { @@ -774,55 +770,56 @@ func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string] parent, err := ctx.TempCache().Get(*c.ParentID) if err != nil { - return "", fmt.Errorf("unable to get parent(%s): %w", c, err) + return fmt.Errorf("unable to get parent(%s): %w", c, err) } - if parent.Path == "" { - if err := tree.AddVertex(parent.ID); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) { - return "", fmt.Errorf("unable to add vertex(%s): %w", parent, err) + nodes := strings.Split(parent.Path, ".") + for i, n := range nodes { + if err := tree.AddVertex(n); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) { + return fmt.Errorf("unable to add vertex(%s): %w", n, err) } - } else { - nodes := strings.Split(parent.Path, ".") - for i, n := range nodes { - if err := tree.AddVertex(n); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) { - return "", fmt.Errorf("unable to add vertex(%s): %w", n, err) - } - if i != 0 { - if err := tree.AddEdge(nodes[i-1], n); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) { - return "", fmt.Errorf("unable to add edge(%s,%s): %w", nodes[i-1], n, err) - } + if i == 0 { + if err := tree.AddEdge(root, n); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) { + return fmt.Errorf("unable to add edge(%s,%s): %w", root, n, err) + } + } else { + if err := tree.AddEdge(nodes[i-1], n); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) { + return fmt.Errorf("unable to add edge(%s,%s): %w", nodes[i-1], n, err) } - } - - if root == "" { - // If there's a path then we know for sure the first node is the root - root = nodes[0] } } } - return root, nil + return nil } -func setConfigPaths(ctx api.ScrapeContext, tree graph.Graph[string, string], root string, allConfigs []*models.ConfigItem) error { - var err error - - if root == "" { - if !ctx.ScrapeConfig().IsCustom() { - root, err = generatePartialTree(ctx, tree, allConfigs) - if err != nil { - return err - } - } else { - if err := tree.AddVertex(root); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) { - return fmt.Errorf("unable to add vertex(%s): %w", root, err) - } +func setConfigPaths(ctx api.ScrapeContext, tree graph.Graph[string, string], isPartialResultSet bool, allConfigs []*models.ConfigItem) error { + // Add a virtual vertex which will be the root for the graph. + // We do this so + // 1. we don't need to define what's root for each scraper manually + // 2. the virtual root can act as the root for all the config items + // that do not have a parent. example: for aws scraper + // all the aws regions & the aws account config does not have a parent. + // This solves the problem were a results set (example: aws scraper) + // has multiple roots. + var virtualRoot = "dummy" + if err := tree.AddVertex(virtualRoot); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) { + return fmt.Errorf("unable to add vertex(%s): %w", virtualRoot, err) + } + + if isPartialResultSet { + if err := generatePartialTree(ctx, tree, virtualRoot, allConfigs); err != nil { + return err } } for _, c := range allConfigs { - if c.ParentID != nil { + if c.ParentID == nil || lo.FromPtr(c.ParentID) == "" { + if err := tree.AddEdge(virtualRoot, c.ID); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) { + return fmt.Errorf("unable to add edge(%s -> %s): %w", virtualRoot, c.ID, err) + } + } else { if err := tree.AddEdge(*c.ParentID, c.ID); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) { return fmt.Errorf("unable to add edge(%s -> %s): %w", *c.ParentID, c.ID, err) } @@ -830,11 +827,6 @@ func setConfigPaths(ctx api.ScrapeContext, tree graph.Graph[string, string], roo } for _, c := range allConfigs { - if c.ParentID == nil { - // Custom scrapers can generate configs without a parent. - continue - } - // resources from the aws scraper are an anomaly because they form a // graph of multiple roots (account & all the regions). // In particular, the availability zone & availability zone id cannot @@ -846,21 +838,14 @@ func setConfigPaths(ctx api.ScrapeContext, tree graph.Graph[string, string], roo continue } - if paths, err := graph.ShortestPath(tree, root, c.ID); err != nil { + if paths, err := graph.ShortestPath(tree, virtualRoot, c.ID); err != nil { ctx.Logger.V(1).Infof("unable to get the path for config(%s): %v", c, err) - } else if len(paths) > 0 { + } else { + // remove the virtual root + paths = paths[1:] c.Path = strings.Join(paths, ".") } } return nil } - -func isTreeRoot(configType string) bool { - switch configType { - case "AWS::::Account", "Kubernetes::Cluster", "Azure::Subscription": - return true - } - - return false -} diff --git a/scrapers/cron.go b/scrapers/cron.go index bdc139bc2..85803f1d7 100644 --- a/scrapers/cron.go +++ b/scrapers/cron.go @@ -226,7 +226,7 @@ func ConsumeKubernetesWatchEventsJobFunc(sc api.ScrapeContext, config v1.Kuberne return err } - if err := SaveResults(cc, results); err != nil { + if err := db.SavePartialResults(cc, results); err != nil { return fmt.Errorf("failed to save results: %w", err) } @@ -303,7 +303,7 @@ func ConsumeKubernetesWatchResourcesJobFunc(sc api.ScrapeContext, config v1.Kube return err } - if err := SaveResults(cc, results); err != nil { + if err := db.SavePartialResults(cc, results); err != nil { return fmt.Errorf("failed to save %d results: %w", len(results), err) } diff --git a/scrapers/run.go b/scrapers/run.go index ab127cf68..e42e09458 100644 --- a/scrapers/run.go +++ b/scrapers/run.go @@ -29,7 +29,7 @@ func RunScraper(ctx api.ScrapeContext) (v1.ScrapeResults, error) { return nil, fmt.Errorf("failed to run scraper %v: %w", ctx.ScrapeConfig().Name, scraperErr) } - if err := SaveResults(ctx, results); err != nil { + if err := db.SaveResults(ctx, results); err != nil { return nil, fmt.Errorf("failed to save results: %w", err) } @@ -41,16 +41,6 @@ func RunScraper(ctx api.ScrapeContext) (v1.ScrapeResults, error) { return results, nil } -func SaveResults(ctx api.ScrapeContext, results v1.ScrapeResults) error { - dbErr := db.SaveResults(ctx, results) - if dbErr != nil { - //FIXME cache results to save to db later - return dbErr - } - - return nil -} - func UpdateStaleConfigItems(ctx api.ScrapeContext, results v1.ScrapeResults) error { persistedID := ctx.ScrapeConfig().GetPersistedID() if persistedID != nil {