Skip to content

Commit

Permalink
feat: remove the need to manually set the root. we use a dummy root.
Browse files Browse the repository at this point in the history
  • Loading branch information
adityathebe committed May 29, 2024
1 parent d334820 commit b61f406
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 84 deletions.
127 changes: 56 additions & 71 deletions db/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,21 @@ func UpdateAnalysisStatusBefore(ctx api.ScrapeContext, before time.Time, scraper

// SaveResults creates or update a configuration with config changes
func SaveResults(ctx api.ScrapeContext, results []v1.ScrapeResult) error {
return saveResults(ctx, false, results)
}

// SaveResults creates or update a configuration with config changes
func SavePartialResults(ctx api.ScrapeContext, results []v1.ScrapeResult) error {
return saveResults(ctx, true, results)
}

func saveResults(ctx api.ScrapeContext, isPartialResultSet bool, results []v1.ScrapeResult) error {
startTime, err := GetCurrentDBTime(ctx)
if err != nil {
return fmt.Errorf("unable to get current db time: %w", err)
}

newConfigs, configsToUpdate, newChanges, changesToUpdate, err := extractConfigsAndChangesFromResults(ctx, startTime, results)
newConfigs, configsToUpdate, newChanges, changesToUpdate, err := extractConfigsAndChangesFromResults(ctx, startTime, isPartialResultSet, results)
if err != nil {
return fmt.Errorf("failed to extract configs & changes from results: %w", err)
}
Expand Down Expand Up @@ -593,15 +602,14 @@ type configExternalKey struct {
parentType string
}

func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime time.Time, results []v1.ScrapeResult) ([]*models.ConfigItem, []*updateConfigArgs, []*models.ConfigChange, []*models.ConfigChange, error) {
func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime time.Time, isPartialResultSet bool, results []v1.ScrapeResult) ([]*models.ConfigItem, []*updateConfigArgs, []*models.ConfigChange, []*models.ConfigChange, error) {
var (
newConfigs = make([]*models.ConfigItem, 0, len(results))
configsToUpdate = make([]*updateConfigArgs, 0, len(results))

newChanges = make([]*models.ConfigChange, 0, len(results))
changesToUpdate = make([]*models.ConfigChange, 0, len(results))

root string
tree = graph.New(graph.StringHash, graph.Rooted(), graph.PreventCycles())
allConfigs = make([]*models.ConfigItem, 0, len(results))

Expand All @@ -627,10 +635,6 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime
return nil, nil, nil, nil, fmt.Errorf("config item %s has no external id", ci)
}

if isTreeRoot(lo.FromPtr(ci.Type)) {
root = ci.ID
}

parentExternalKey := configExternalKey{externalID: ci.ExternalID[0], parentType: lo.FromPtr(ci.Type)}
parentTypeToConfigMap[parentExternalKey] = ci.ID

Expand Down Expand Up @@ -681,11 +685,11 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime
// This is because, on the first run, we don't have any configs at all in the DB.
// So, all the parent lookups will return empty result and no parent will be set.
// This way, we can first look for the parents within the result set.
if err := setConfigParents(ctx, root, parentTypeToConfigMap, allConfigs); err != nil {
if err := setConfigParents(ctx, parentTypeToConfigMap, allConfigs); err != nil {
return nil, nil, nil, nil, fmt.Errorf("unable to setup parents: %w", err)
}

if err := setConfigPaths(ctx, tree, root, allConfigs); err != nil {
if err := setConfigPaths(ctx, tree, isPartialResultSet, allConfigs); err != nil {
return nil, nil, nil, nil, fmt.Errorf("unable to set config paths: %w", err)
}

Expand All @@ -706,21 +710,14 @@ func extractConfigsAndChangesFromResults(ctx api.ScrapeContext, scrapeStartTime
return newConfigs, configsToUpdate, newChanges, changesToUpdate, nil
}

func setConfigParents(ctx api.ScrapeContext, root string, parentTypeToConfigMap map[configExternalKey]string, allConfigs []*models.ConfigItem) error {
func setConfigParents(ctx api.ScrapeContext, parentTypeToConfigMap map[configExternalKey]string, allConfigs []*models.ConfigItem) error {
for _, ci := range allConfigs {
if ci.ParentID != nil {
continue // existing item. Parent is already set.
}

if ci.ParentExternalID == "" || ci.ParentType == "" {
if ci.ID == root || ctx.ScrapeConfig().IsCustom() {
continue
} else if lo.FromPtr(ci.Type) == v1.AWSRegion {
// Regions don't have parents but they are also not the root of the aws scraped resources
continue
} else {
return fmt.Errorf("config %s has no parent", ci)
}
continue
}

if parentID, found := parentTypeToConfigMap[configExternalKey{
Expand All @@ -743,7 +740,7 @@ func setConfigParents(ctx api.ScrapeContext, root string, parentTypeToConfigMap
return nil
}

func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string], allConfigs []*models.ConfigItem) (string, error) {
func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string], root string, allConfigs []*models.ConfigItem) error {
// When we have a partial result set from an incremental scraper
// we try to form a partial tree that's just sufficient to
// connect the config item back to the root.
Expand All @@ -759,13 +756,12 @@ func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string]
configIDs[c.ID] = struct{}{}
}

var root string
for _, c := range allConfigs {
if c.ParentID == nil {
// We aren't supposed to hit this point, except when an incremental scraper runs before a full scrape
//
// We fail early here than failing on db insert.
return "", fmt.Errorf("a non root config found without a parent %s", c)
return fmt.Errorf("a non root config found without a parent %s", c)
}

if _, found := configIDs[*c.ParentID]; found {
Expand All @@ -774,67 +770,63 @@ func generatePartialTree(ctx api.ScrapeContext, tree graph.Graph[string, string]

parent, err := ctx.TempCache().Get(*c.ParentID)
if err != nil {
return "", fmt.Errorf("unable to get parent(%s): %w", c, err)
return fmt.Errorf("unable to get parent(%s): %w", c, err)
}

if parent.Path == "" {
if err := tree.AddVertex(parent.ID); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) {
return "", fmt.Errorf("unable to add vertex(%s): %w", parent, err)
nodes := strings.Split(parent.Path, ".")
for i, n := range nodes {
if err := tree.AddVertex(n); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) {
return fmt.Errorf("unable to add vertex(%s): %w", n, err)
}
} else {
nodes := strings.Split(parent.Path, ".")
for i, n := range nodes {
if err := tree.AddVertex(n); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) {
return "", fmt.Errorf("unable to add vertex(%s): %w", n, err)
}

if i != 0 {
if err := tree.AddEdge(nodes[i-1], n); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) {
return "", fmt.Errorf("unable to add edge(%s,%s): %w", nodes[i-1], n, err)
}
if i == 0 {
if err := tree.AddEdge(root, n); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) {
return fmt.Errorf("unable to add edge(%s,%s): %w", root, n, err)
}
} else {
if err := tree.AddEdge(nodes[i-1], n); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) {
return fmt.Errorf("unable to add edge(%s,%s): %w", nodes[i-1], n, err)
}
}

if root == "" {
// If there's a path then we know for sure the first node is the root
root = nodes[0]
}
}
}

return root, nil
return nil
}

func setConfigPaths(ctx api.ScrapeContext, tree graph.Graph[string, string], root string, allConfigs []*models.ConfigItem) error {
var err error

if root == "" {
if !ctx.ScrapeConfig().IsCustom() {
root, err = generatePartialTree(ctx, tree, allConfigs)
if err != nil {
return err
}
} else {
if err := tree.AddVertex(root); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) {
return fmt.Errorf("unable to add vertex(%s): %w", root, err)
}
func setConfigPaths(ctx api.ScrapeContext, tree graph.Graph[string, string], isPartialResultSet bool, allConfigs []*models.ConfigItem) error {
// Add a virtual vertex which will be the root for the graph.
// We do this so
// 1. we don't need to define what's root for each scraper manually
// 2. the virtual root can act as the root for all the config items
// that do not have a parent. example: for aws scraper
// all the aws regions & the aws account config does not have a parent.
// This solves the problem were a results set (example: aws scraper)
// has multiple roots.
var virtualRoot = "dummy"
if err := tree.AddVertex(virtualRoot); err != nil && !errors.Is(err, graph.ErrVertexAlreadyExists) {
return fmt.Errorf("unable to add vertex(%s): %w", virtualRoot, err)
}

if isPartialResultSet {
if err := generatePartialTree(ctx, tree, virtualRoot, allConfigs); err != nil {
return err
}
}

for _, c := range allConfigs {
if c.ParentID != nil {
if c.ParentID == nil || lo.FromPtr(c.ParentID) == "" {
if err := tree.AddEdge(virtualRoot, c.ID); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) {
return fmt.Errorf("unable to add edge(%s -> %s): %w", virtualRoot, c.ID, err)
}
} else {
if err := tree.AddEdge(*c.ParentID, c.ID); err != nil && !errors.Is(err, graph.ErrEdgeAlreadyExists) {
return fmt.Errorf("unable to add edge(%s -> %s): %w", *c.ParentID, c.ID, err)
}
}
}

for _, c := range allConfigs {
if c.ParentID == nil {
// Custom scrapers can generate configs without a parent.
continue
}

// resources from the aws scraper are an anomaly because they form a
// graph of multiple roots (account & all the regions).
// In particular, the availability zone & availability zone id cannot
Expand All @@ -846,21 +838,14 @@ func setConfigPaths(ctx api.ScrapeContext, tree graph.Graph[string, string], roo
continue
}

if paths, err := graph.ShortestPath(tree, root, c.ID); err != nil {
if paths, err := graph.ShortestPath(tree, virtualRoot, c.ID); err != nil {
ctx.Logger.V(1).Infof("unable to get the path for config(%s): %v", c, err)
} else if len(paths) > 0 {
} else {
// remove the virtual root
paths = paths[1:]
c.Path = strings.Join(paths, ".")
}
}

return nil
}

func isTreeRoot(configType string) bool {
switch configType {
case "AWS::::Account", "Kubernetes::Cluster", "Azure::Subscription":
return true
}

return false
}
4 changes: 2 additions & 2 deletions scrapers/cron.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ func ConsumeKubernetesWatchEventsJobFunc(sc api.ScrapeContext, config v1.Kuberne
return err
}

if err := SaveResults(cc, results); err != nil {
if err := db.SavePartialResults(cc, results); err != nil {
return fmt.Errorf("failed to save results: %w", err)
}

Expand Down Expand Up @@ -303,7 +303,7 @@ func ConsumeKubernetesWatchResourcesJobFunc(sc api.ScrapeContext, config v1.Kube
return err
}

if err := SaveResults(cc, results); err != nil {
if err := db.SavePartialResults(cc, results); err != nil {
return fmt.Errorf("failed to save %d results: %w", len(results), err)
}

Expand Down
12 changes: 1 addition & 11 deletions scrapers/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func RunScraper(ctx api.ScrapeContext) (v1.ScrapeResults, error) {
return nil, fmt.Errorf("failed to run scraper %v: %w", ctx.ScrapeConfig().Name, scraperErr)
}

if err := SaveResults(ctx, results); err != nil {
if err := db.SaveResults(ctx, results); err != nil {
return nil, fmt.Errorf("failed to save results: %w", err)
}

Expand All @@ -41,16 +41,6 @@ func RunScraper(ctx api.ScrapeContext) (v1.ScrapeResults, error) {
return results, nil
}

func SaveResults(ctx api.ScrapeContext, results v1.ScrapeResults) error {
dbErr := db.SaveResults(ctx, results)
if dbErr != nil {
//FIXME cache results to save to db later
return dbErr
}

return nil
}

func UpdateStaleConfigItems(ctx api.ScrapeContext, results v1.ScrapeResults) error {
persistedID := ctx.ScrapeConfig().GetPersistedID()
if persistedID != nil {
Expand Down

0 comments on commit b61f406

Please sign in to comment.