From 7fd0755119e8ee900b9e985ccc8e815695c2c8a5 Mon Sep 17 00:00:00 2001 From: Yash Mehrotra Date: Tue, 5 Dec 2023 16:00:26 +0530 Subject: [PATCH] feat: change retention (#324) * feat: change retention * chore: clean up * fix: config retention tests and flow * chore: change config retention to a job --- api/v1/types.go | 11 +++ api/v1/zz_generated.deepcopy.go | 36 +++++++ ...configs.flanksource.com_scrapeconfigs.yaml | 14 +++ config/schemas/scrape_config.schema.json | 2 +- jobs/jobs.go | 1 + jobs/retention.go | 48 ++++++++++ scrapers/retention.go | 66 +++++++++++++ scrapers/run.go | 8 +- scrapers/runscrapers_test.go | 96 +++++++++++++++++++ 9 files changed, 277 insertions(+), 5 deletions(-) create mode 100644 jobs/retention.go create mode 100644 scrapers/retention.go diff --git a/api/v1/types.go b/api/v1/types.go index 1b593d74..758c52b9 100644 --- a/api/v1/types.go +++ b/api/v1/types.go @@ -22,6 +22,16 @@ var AllScraperConfigs = map[string]any{ "trivy": Trivy{}, } +type ChangeRetentionSpec struct { + Name string `json:"name,omitempty"` + Age string `json:"age,omitempty"` + Count int `json:"count,omitempty"` +} + +type RetentionSpec struct { + Changes []ChangeRetentionSpec `json:"changes,omitempty"` +} + // ScraperSpec defines the desired state of Config scraper type ScraperSpec struct { LogLevel string `json:"logLevel,omitempty"` @@ -35,6 +45,7 @@ type ScraperSpec struct { Azure []Azure `json:"azure,omitempty" yaml:"azure,omitempty"` SQL []SQL `json:"sql,omitempty" yaml:"sql,omitempty"` Trivy []Trivy `json:"trivy,omitempty" yaml:"trivy,omitempty"` + Retention RetentionSpec `json:"retention,omitempty"` // Full flag when set will try to extract out changes from the scraped config. Full bool `json:"full,omitempty"` diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index b3a8c790..d15ea09b 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -176,6 +176,21 @@ func (in *BaseScraper) DeepCopy() *BaseScraper { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ChangeRetentionSpec) DeepCopyInto(out *ChangeRetentionSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ChangeRetentionSpec. +func (in *ChangeRetentionSpec) DeepCopy() *ChangeRetentionSpec { + if in == nil { + return nil + } + out := new(ChangeRetentionSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *CloudTrail) DeepCopyInto(out *CloudTrail) { *out = *in @@ -710,6 +725,26 @@ func (in *ResourceSelector) DeepCopy() *ResourceSelector { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RetentionSpec) DeepCopyInto(out *RetentionSpec) { + *out = *in + if in.Changes != nil { + in, out := &in.Changes, &out.Changes + *out = make([]ChangeRetentionSpec, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RetentionSpec. +func (in *RetentionSpec) DeepCopy() *RetentionSpec { + if in == nil { + return nil + } + out := new(RetentionSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RunNowResponse) DeepCopyInto(out *RunNowResponse) { *out = *in @@ -887,6 +922,7 @@ func (in *ScraperSpec) DeepCopyInto(out *ScraperSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + in.Retention.DeepCopyInto(&out.Retention) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ScraperSpec. diff --git a/chart/crds/configs.flanksource.com_scrapeconfigs.yaml b/chart/crds/configs.flanksource.com_scrapeconfigs.yaml index dbaa60e7..7740ccd5 100644 --- a/chart/crds/configs.flanksource.com_scrapeconfigs.yaml +++ b/chart/crds/configs.flanksource.com_scrapeconfigs.yaml @@ -1167,6 +1167,20 @@ spec: type: array logLevel: type: string + retention: + properties: + changes: + items: + properties: + age: + type: string + count: + type: integer + name: + type: string + type: object + type: array + type: object schedule: type: string sql: diff --git a/config/schemas/scrape_config.schema.json b/config/schemas/scrape_config.schema.json index 82f238ab..5643a016 100644 --- a/config/schemas/scrape_config.schema.json +++ b/config/schemas/scrape_config.schema.json @@ -1 +1 @@ -{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ScrapeConfig","definitions":{"AWS":{"required":["BaseScraper","AWSConnection"],"properties":{"BaseScraper":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/BaseScraper"},"AWSConnection":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/AWSConnection"},"patch_states":{"type":"boolean"},"patch_details":{"type":"boolean"},"inventory":{"type":"boolean"},"compliance":{"type":"boolean"},"cloudtrail":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/CloudTrail"},"trusted_advisor_check":{"type":"boolean"},"include":{"items":{"type":"string"},"type":"array"},"exclude":{"items":{"type":"string"},"type":"array"},"cost_reporting":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/CostReporting"}},"additionalProperties":false,"type":"object"},"AWSConnection":{"required":["region"],"properties":{"connection":{"type":"string"},"accessKey":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/EnvVar"},"secretKey":{"$ref":"#/definitions/EnvVar"},"region":{"items":{"type":"string"},"type":"array"},"endpoint":{"type":"string"},"skipTLSVerify":{"type":"boolean"},"assumeRole":{"type":"string"}},"additionalProperties":false,"type":"object"},"Authentication":{"required":["username","password"],"properties":{"username":{"$ref":"#/definitions/EnvVar"},"password":{"$ref":"#/definitions/EnvVar"}},"additionalProperties":false,"type":"object"},"Azure":{"required":["BaseScraper","subscriptionID","organisation","tenantID"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"connection":{"type":"string"},"subscriptionID":{"type":"string"},"organisation":{"type":"string"},"clientID":{"$ref":"#/definitions/EnvVar"},"clientSecret":{"$ref":"#/definitions/EnvVar"},"tenantID":{"type":"string"}},"additionalProperties":false,"type":"object"},"AzureDevops":{"required":["BaseScraper","projects","pipelines"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"connection":{"type":"string"},"organization":{"type":"string"},"personalAccessToken":{"$ref":"#/definitions/EnvVar"},"projects":{"items":{"type":"string"},"type":"array"},"pipelines":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object"},"BaseScraper":{"properties":{"id":{"type":"string"},"name":{"type":"string"},"items":{"type":"string"},"type":{"type":"string"},"transform":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Transform"},"format":{"type":"string"},"timestampFormat":{"type":"string"},"createFields":{"items":{"type":"string"},"type":"array"},"deleteFields":{"items":{"type":"string"},"type":"array"},"tags":{"patternProperties":{".*":{"type":"string"}},"type":"object"}},"additionalProperties":false,"type":"object"},"CloudTrail":{"properties":{"exclude":{"items":{"type":"string"},"type":"array"},"max_age":{"type":"string"}},"additionalProperties":false,"type":"object"},"ConfigMapKeySelector":{"required":["key"],"properties":{"name":{"type":"string"},"key":{"type":"string"}},"additionalProperties":false,"type":"object"},"Connection":{"required":["connection"],"properties":{"connection":{"type":"string"},"auth":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Authentication"}},"additionalProperties":false,"type":"object"},"CostReporting":{"properties":{"s3_bucket_path":{"type":"string"},"table":{"type":"string"},"database":{"type":"string"},"region":{"type":"string"}},"additionalProperties":false,"type":"object"},"EnvVar":{"properties":{"name":{"type":"string"},"value":{"type":"string"},"valueFrom":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/EnvVarSource"}},"additionalProperties":false,"type":"object"},"EnvVarSource":{"properties":{"configMapKeyRef":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ConfigMapKeySelector"},"secretKeyRef":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/SecretKeySelector"}},"additionalProperties":false,"type":"object"},"FieldsV1":{"properties":{},"additionalProperties":false,"type":"object"},"File":{"required":["BaseScraper"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"url":{"type":"string"},"paths":{"items":{"type":"string"},"type":"array"},"ignore":{"items":{"type":"string"},"type":"array"},"format":{"type":"string"},"icon":{"type":"string"},"connection":{"type":"string"}},"additionalProperties":false,"type":"object"},"Filter":{"properties":{"jsonpath":{"type":"string"}},"additionalProperties":false,"type":"object"},"GitHubActions":{"required":["BaseScraper","owner","repository","personalAccessToken","workflows"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"owner":{"type":"string"},"repository":{"type":"string"},"personalAccessToken":{"$ref":"#/definitions/EnvVar"},"connection":{"type":"string"},"workflows":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object"},"Kubernetes":{"required":["BaseScraper"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"clusterName":{"type":"string"},"namespace":{"type":"string"},"useCache":{"type":"boolean"},"allowIncomplete":{"type":"boolean"},"scope":{"type":"string"},"since":{"type":"string"},"selector":{"type":"string"},"fieldSelector":{"type":"string"},"maxInflight":{"type":"integer"},"exclusions":{"items":{"type":"string"},"type":"array"},"kubeconfig":{"$ref":"#/definitions/EnvVar"},"event":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesEvent"},"relationships":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesRelationship"},"type":"array"}},"additionalProperties":false,"type":"object"},"KubernetesEvent":{"properties":{"exclusions":{"items":{"type":"string"},"type":"array"},"severityKeywords":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/SeverityKeywords"}},"additionalProperties":false,"type":"object"},"KubernetesFile":{"required":["BaseScraper","selector"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"selector":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ResourceSelector"},"container":{"type":"string"},"files":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/PodFile"},"type":"array"}},"additionalProperties":false,"type":"object"},"KubernetesRelationship":{"required":["kind"],"properties":{"kind":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesRelationshipLookup"},"name":{"$ref":"#/definitions/KubernetesRelationshipLookup"},"namespace":{"$ref":"#/definitions/KubernetesRelationshipLookup"}},"additionalProperties":false,"type":"object"},"KubernetesRelationshipLookup":{"properties":{"expr":{"type":"string"},"value":{"type":"string"},"label":{"type":"string"}},"additionalProperties":false,"type":"object"},"ManagedFieldsEntry":{"properties":{"manager":{"type":"string"},"operation":{"type":"string"},"apiVersion":{"type":"string"},"time":{"$ref":"#/definitions/Time"},"fieldsType":{"type":"string"},"fieldsV1":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/FieldsV1"},"subresource":{"type":"string"}},"additionalProperties":false,"type":"object"},"Mask":{"properties":{"selector":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/MaskSelector"},"jsonpath":{"type":"string"},"value":{"type":"string"}},"additionalProperties":false,"type":"object"},"MaskSelector":{"properties":{"type":{"type":"string"}},"additionalProperties":false,"type":"object"},"ObjectMeta":{"properties":{"name":{"type":"string"},"generateName":{"type":"string"},"namespace":{"type":"string"},"selfLink":{"type":"string"},"uid":{"type":"string"},"resourceVersion":{"type":"string"},"generation":{"type":"integer"},"creationTimestamp":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Time"},"deletionTimestamp":{"$ref":"#/definitions/Time"},"deletionGracePeriodSeconds":{"type":"integer"},"labels":{"patternProperties":{".*":{"type":"string"}},"type":"object"},"annotations":{"patternProperties":{".*":{"type":"string"}},"type":"object"},"ownerReferences":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/OwnerReference"},"type":"array"},"finalizers":{"items":{"type":"string"},"type":"array"},"managedFields":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ManagedFieldsEntry"},"type":"array"}},"additionalProperties":false,"type":"object"},"OwnerReference":{"required":["apiVersion","kind","name","uid"],"properties":{"apiVersion":{"type":"string"},"kind":{"type":"string"},"name":{"type":"string"},"uid":{"type":"string"},"controller":{"type":"boolean"},"blockOwnerDeletion":{"type":"boolean"}},"additionalProperties":false,"type":"object"},"PodFile":{"properties":{"path":{"items":{"type":"string"},"type":"array"},"format":{"type":"string"}},"additionalProperties":false,"type":"object"},"ResourceSelector":{"properties":{"namespace":{"type":"string"},"kind":{"type":"string"},"name":{"type":"string"},"labelSelector":{"type":"string"},"fieldSelector":{"type":"string"}},"additionalProperties":false,"type":"object"},"SQL":{"required":["BaseScraper","Connection","query"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"Connection":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Connection"},"driver":{"type":"string"},"query":{"type":"string"}},"additionalProperties":false,"type":"object"},"ScrapeConfig":{"required":["TypeMeta"],"properties":{"TypeMeta":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/TypeMeta"},"metadata":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ObjectMeta"},"spec":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ScraperSpec"},"status":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ScrapeConfigStatus"}},"additionalProperties":false,"type":"object"},"ScrapeConfigStatus":{"properties":{"observedGeneration":{"type":"integer"}},"additionalProperties":false,"type":"object"},"ScraperSpec":{"properties":{"logLevel":{"type":"string"},"schedule":{"type":"string"},"aws":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/AWS"},"type":"array"},"file":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/File"},"type":"array"},"kubernetes":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Kubernetes"},"type":"array"},"kubernetesFile":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesFile"},"type":"array"},"azureDevops":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/AzureDevops"},"type":"array"},"githubActions":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/GitHubActions"},"type":"array"},"azure":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Azure"},"type":"array"},"sql":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/SQL"},"type":"array"},"trivy":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Trivy"},"type":"array"},"full":{"type":"boolean"}},"additionalProperties":false,"type":"object"},"SecretKeySelector":{"required":["key"],"properties":{"name":{"type":"string"},"key":{"type":"string"}},"additionalProperties":false,"type":"object"},"SeverityKeywords":{"properties":{"warn":{"items":{"type":"string"},"type":"array"},"error":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object"},"Time":{"properties":{},"additionalProperties":false,"type":"object"},"Transform":{"properties":{"gotemplate":{"type":"string"},"jsonpath":{"type":"string"},"expr":{"type":"string"},"javascript":{"type":"string"},"include":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Filter"},"type":"array"},"exclude":{"items":{"$ref":"#/definitions/Filter"},"type":"array"},"mask":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Mask"},"type":"array"}},"additionalProperties":false,"type":"object"},"Trivy":{"required":["BaseScraper"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"version":{"type":"string"},"compliance":{"items":{"type":"string"},"type":"array"},"ignoredLicenses":{"items":{"type":"string"},"type":"array"},"ignoreUnfixed":{"type":"boolean"},"licenseFull":{"type":"boolean"},"severity":{"items":{"type":"string"},"type":"array"},"vulnType":{"items":{"type":"string"},"type":"array"},"scanners":{"items":{"type":"string"},"type":"array"},"timeout":{"type":"string"},"kubernetes":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/TrivyK8sOptions"}},"additionalProperties":false,"type":"object"},"TrivyK8sOptions":{"properties":{"components":{"items":{"type":"string"},"type":"array"},"context":{"type":"string"},"kubeconfig":{"type":"string"},"namespace":{"type":"string"}},"additionalProperties":false,"type":"object"},"TypeMeta":{"properties":{"kind":{"type":"string"},"apiVersion":{"type":"string"}},"additionalProperties":false,"type":"object"}}} \ No newline at end of file +{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ScrapeConfig","definitions":{"AWS":{"required":["BaseScraper","AWSConnection"],"properties":{"BaseScraper":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/BaseScraper"},"AWSConnection":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/AWSConnection"},"patch_states":{"type":"boolean"},"patch_details":{"type":"boolean"},"inventory":{"type":"boolean"},"compliance":{"type":"boolean"},"cloudtrail":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/CloudTrail"},"trusted_advisor_check":{"type":"boolean"},"include":{"items":{"type":"string"},"type":"array"},"exclude":{"items":{"type":"string"},"type":"array"},"cost_reporting":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/CostReporting"}},"additionalProperties":false,"type":"object"},"AWSConnection":{"required":["region"],"properties":{"connection":{"type":"string"},"accessKey":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/EnvVar"},"secretKey":{"$ref":"#/definitions/EnvVar"},"region":{"items":{"type":"string"},"type":"array"},"endpoint":{"type":"string"},"skipTLSVerify":{"type":"boolean"},"assumeRole":{"type":"string"}},"additionalProperties":false,"type":"object"},"Authentication":{"required":["username","password"],"properties":{"username":{"$ref":"#/definitions/EnvVar"},"password":{"$ref":"#/definitions/EnvVar"}},"additionalProperties":false,"type":"object"},"Azure":{"required":["BaseScraper","subscriptionID","organisation","tenantID"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"connection":{"type":"string"},"subscriptionID":{"type":"string"},"organisation":{"type":"string"},"clientID":{"$ref":"#/definitions/EnvVar"},"clientSecret":{"$ref":"#/definitions/EnvVar"},"tenantID":{"type":"string"}},"additionalProperties":false,"type":"object"},"AzureDevops":{"required":["BaseScraper","projects","pipelines"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"connection":{"type":"string"},"organization":{"type":"string"},"personalAccessToken":{"$ref":"#/definitions/EnvVar"},"projects":{"items":{"type":"string"},"type":"array"},"pipelines":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object"},"BaseScraper":{"properties":{"id":{"type":"string"},"name":{"type":"string"},"items":{"type":"string"},"type":{"type":"string"},"transform":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Transform"},"format":{"type":"string"},"timestampFormat":{"type":"string"},"createFields":{"items":{"type":"string"},"type":"array"},"deleteFields":{"items":{"type":"string"},"type":"array"},"tags":{"patternProperties":{".*":{"type":"string"}},"type":"object"}},"additionalProperties":false,"type":"object"},"ChangeRetentionSpec":{"properties":{"name":{"type":"string"},"age":{"type":"string"},"count":{"type":"integer"}},"additionalProperties":false,"type":"object"},"CloudTrail":{"properties":{"exclude":{"items":{"type":"string"},"type":"array"},"max_age":{"type":"string"}},"additionalProperties":false,"type":"object"},"ConfigMapKeySelector":{"required":["key"],"properties":{"name":{"type":"string"},"key":{"type":"string"}},"additionalProperties":false,"type":"object"},"Connection":{"required":["connection"],"properties":{"connection":{"type":"string"},"auth":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Authentication"}},"additionalProperties":false,"type":"object"},"CostReporting":{"properties":{"s3_bucket_path":{"type":"string"},"table":{"type":"string"},"database":{"type":"string"},"region":{"type":"string"}},"additionalProperties":false,"type":"object"},"EnvVar":{"properties":{"name":{"type":"string"},"value":{"type":"string"},"valueFrom":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/EnvVarSource"}},"additionalProperties":false,"type":"object"},"EnvVarSource":{"properties":{"configMapKeyRef":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ConfigMapKeySelector"},"secretKeyRef":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/SecretKeySelector"}},"additionalProperties":false,"type":"object"},"FieldsV1":{"properties":{},"additionalProperties":false,"type":"object"},"File":{"required":["BaseScraper"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"url":{"type":"string"},"paths":{"items":{"type":"string"},"type":"array"},"ignore":{"items":{"type":"string"},"type":"array"},"format":{"type":"string"},"icon":{"type":"string"},"connection":{"type":"string"}},"additionalProperties":false,"type":"object"},"Filter":{"properties":{"jsonpath":{"type":"string"}},"additionalProperties":false,"type":"object"},"GitHubActions":{"required":["BaseScraper","owner","repository","personalAccessToken","workflows"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"owner":{"type":"string"},"repository":{"type":"string"},"personalAccessToken":{"$ref":"#/definitions/EnvVar"},"connection":{"type":"string"},"workflows":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object"},"Kubernetes":{"required":["BaseScraper"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"clusterName":{"type":"string"},"namespace":{"type":"string"},"useCache":{"type":"boolean"},"allowIncomplete":{"type":"boolean"},"scope":{"type":"string"},"since":{"type":"string"},"selector":{"type":"string"},"fieldSelector":{"type":"string"},"maxInflight":{"type":"integer"},"exclusions":{"items":{"type":"string"},"type":"array"},"kubeconfig":{"$ref":"#/definitions/EnvVar"},"event":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesEvent"},"relationships":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesRelationship"},"type":"array"}},"additionalProperties":false,"type":"object"},"KubernetesEvent":{"properties":{"exclusions":{"items":{"type":"string"},"type":"array"},"severityKeywords":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/SeverityKeywords"}},"additionalProperties":false,"type":"object"},"KubernetesFile":{"required":["BaseScraper","selector"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"selector":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ResourceSelector"},"container":{"type":"string"},"files":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/PodFile"},"type":"array"}},"additionalProperties":false,"type":"object"},"KubernetesRelationship":{"required":["kind"],"properties":{"kind":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesRelationshipLookup"},"name":{"$ref":"#/definitions/KubernetesRelationshipLookup"},"namespace":{"$ref":"#/definitions/KubernetesRelationshipLookup"}},"additionalProperties":false,"type":"object"},"KubernetesRelationshipLookup":{"properties":{"expr":{"type":"string"},"value":{"type":"string"},"label":{"type":"string"}},"additionalProperties":false,"type":"object"},"ManagedFieldsEntry":{"properties":{"manager":{"type":"string"},"operation":{"type":"string"},"apiVersion":{"type":"string"},"time":{"$ref":"#/definitions/Time"},"fieldsType":{"type":"string"},"fieldsV1":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/FieldsV1"},"subresource":{"type":"string"}},"additionalProperties":false,"type":"object"},"Mask":{"properties":{"selector":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/MaskSelector"},"jsonpath":{"type":"string"},"value":{"type":"string"}},"additionalProperties":false,"type":"object"},"MaskSelector":{"properties":{"type":{"type":"string"}},"additionalProperties":false,"type":"object"},"ObjectMeta":{"properties":{"name":{"type":"string"},"generateName":{"type":"string"},"namespace":{"type":"string"},"selfLink":{"type":"string"},"uid":{"type":"string"},"resourceVersion":{"type":"string"},"generation":{"type":"integer"},"creationTimestamp":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Time"},"deletionTimestamp":{"$ref":"#/definitions/Time"},"deletionGracePeriodSeconds":{"type":"integer"},"labels":{"patternProperties":{".*":{"type":"string"}},"type":"object"},"annotations":{"patternProperties":{".*":{"type":"string"}},"type":"object"},"ownerReferences":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/OwnerReference"},"type":"array"},"finalizers":{"items":{"type":"string"},"type":"array"},"managedFields":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ManagedFieldsEntry"},"type":"array"}},"additionalProperties":false,"type":"object"},"OwnerReference":{"required":["apiVersion","kind","name","uid"],"properties":{"apiVersion":{"type":"string"},"kind":{"type":"string"},"name":{"type":"string"},"uid":{"type":"string"},"controller":{"type":"boolean"},"blockOwnerDeletion":{"type":"boolean"}},"additionalProperties":false,"type":"object"},"PodFile":{"properties":{"path":{"items":{"type":"string"},"type":"array"},"format":{"type":"string"}},"additionalProperties":false,"type":"object"},"ResourceSelector":{"properties":{"namespace":{"type":"string"},"kind":{"type":"string"},"name":{"type":"string"},"labelSelector":{"type":"string"},"fieldSelector":{"type":"string"}},"additionalProperties":false,"type":"object"},"RetentionSpec":{"properties":{"changes":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ChangeRetentionSpec"},"type":"array"}},"additionalProperties":false,"type":"object"},"SQL":{"required":["BaseScraper","Connection","query"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"Connection":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Connection"},"driver":{"type":"string"},"query":{"type":"string"}},"additionalProperties":false,"type":"object"},"ScrapeConfig":{"required":["TypeMeta"],"properties":{"TypeMeta":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/TypeMeta"},"metadata":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ObjectMeta"},"spec":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ScraperSpec"},"status":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/ScrapeConfigStatus"}},"additionalProperties":false,"type":"object"},"ScrapeConfigStatus":{"properties":{"observedGeneration":{"type":"integer"}},"additionalProperties":false,"type":"object"},"ScraperSpec":{"properties":{"logLevel":{"type":"string"},"schedule":{"type":"string"},"aws":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/AWS"},"type":"array"},"file":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/File"},"type":"array"},"kubernetes":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Kubernetes"},"type":"array"},"kubernetesFile":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/KubernetesFile"},"type":"array"},"azureDevops":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/AzureDevops"},"type":"array"},"githubActions":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/GitHubActions"},"type":"array"},"azure":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Azure"},"type":"array"},"sql":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/SQL"},"type":"array"},"trivy":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Trivy"},"type":"array"},"retention":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/RetentionSpec"},"full":{"type":"boolean"}},"additionalProperties":false,"type":"object"},"SecretKeySelector":{"required":["key"],"properties":{"name":{"type":"string"},"key":{"type":"string"}},"additionalProperties":false,"type":"object"},"SeverityKeywords":{"properties":{"warn":{"items":{"type":"string"},"type":"array"},"error":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object"},"Time":{"properties":{},"additionalProperties":false,"type":"object"},"Transform":{"properties":{"gotemplate":{"type":"string"},"jsonpath":{"type":"string"},"expr":{"type":"string"},"javascript":{"type":"string"},"include":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Filter"},"type":"array"},"exclude":{"items":{"$ref":"#/definitions/Filter"},"type":"array"},"mask":{"items":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/Mask"},"type":"array"}},"additionalProperties":false,"type":"object"},"Trivy":{"required":["BaseScraper"],"properties":{"BaseScraper":{"$ref":"#/definitions/BaseScraper"},"version":{"type":"string"},"compliance":{"items":{"type":"string"},"type":"array"},"ignoredLicenses":{"items":{"type":"string"},"type":"array"},"ignoreUnfixed":{"type":"boolean"},"licenseFull":{"type":"boolean"},"severity":{"items":{"type":"string"},"type":"array"},"vulnType":{"items":{"type":"string"},"type":"array"},"scanners":{"items":{"type":"string"},"type":"array"},"timeout":{"type":"string"},"kubernetes":{"$schema":"http://json-schema.org/draft-04/schema#","$ref":"#/definitions/TrivyK8sOptions"}},"additionalProperties":false,"type":"object"},"TrivyK8sOptions":{"properties":{"components":{"items":{"type":"string"},"type":"array"},"context":{"type":"string"},"kubeconfig":{"type":"string"},"namespace":{"type":"string"}},"additionalProperties":false,"type":"object"},"TypeMeta":{"properties":{"kind":{"type":"string"},"apiVersion":{"type":"string"}},"additionalProperties":false,"type":"object"}}} \ No newline at end of file diff --git a/jobs/jobs.go b/jobs/jobs.go index 36ad071c..aae6e134 100644 --- a/jobs/jobs.go +++ b/jobs/jobs.go @@ -27,6 +27,7 @@ func ScheduleJobs() { scheduleFunc("@every 24h", DeleteOldConfigChanges) scheduleFunc("@every 24h", DeleteOldConfigAnalysis) scheduleFunc("@every 24h", CleanupConfigItems) + scheduleFunc("@every 1h", ProcessChangeRetentionRules) if api.UpstreamConfig.Valid() { pullJob := &UpstreamPullJob{} diff --git a/jobs/retention.go b/jobs/retention.go new file mode 100644 index 00000000..8593a3db --- /dev/null +++ b/jobs/retention.go @@ -0,0 +1,48 @@ +package jobs + +import ( + gocontext "context" + "encoding/json" + + "github.com/flanksource/commons/logger" + "github.com/flanksource/config-db/api/v1" + "github.com/flanksource/config-db/db" + "github.com/flanksource/config-db/scrapers" + "github.com/flanksource/duty/context" + "github.com/flanksource/duty/models" +) + +func ProcessChangeRetentionRules() { + ctx := context.NewContext(gocontext.Background()).WithDB(db.DefaultDB(), db.Pool) + jobHistory := models.NewJobHistory("ProcessChangeRetentionRules", "", "").Start() + _ = db.PersistJobHistory(jobHistory) + defer func() { + _ = db.PersistJobHistory(jobHistory.End()) + }() + + var activeScrapers []models.ConfigScraper + if err := ctx.DB().Where("deleted_at IS NULL").Find(&activeScrapers).Error; err != nil { + logger.Errorf("Error querying config scrapers from db: %v", err) + jobHistory.AddError(err.Error()) + return + } + + for _, s := range activeScrapers { + var spec v1.ScraperSpec + if err := json.Unmarshal([]byte(s.Spec), &spec); err != nil { + logger.Errorf("Error unmarshaling config scraper[%s] into json: %v", s.ID, err) + jobHistory.AddError(err.Error()) + continue + } + + for _, changeSpec := range spec.Retention.Changes { + err := scrapers.ProcessChangeRetention(ctx, s.ID, changeSpec) + if err != nil { + logger.Errorf("Error processing change retention for scraper[%s] config analysis: %v", s.ID, err) + jobHistory.AddError(err.Error()) + } else { + jobHistory.IncrSuccess() + } + } + } +} diff --git a/scrapers/retention.go b/scrapers/retention.go new file mode 100644 index 00000000..6aa57d07 --- /dev/null +++ b/scrapers/retention.go @@ -0,0 +1,66 @@ +package scrapers + +import ( + "database/sql" + "fmt" + "strings" + + "github.com/flanksource/commons/duration" + "github.com/flanksource/commons/logger" + v1 "github.com/flanksource/config-db/api/v1" + "github.com/flanksource/duty/context" + "github.com/google/uuid" +) + +func ProcessChangeRetention(ctx context.Context, scraperID uuid.UUID, spec v1.ChangeRetentionSpec) error { + var whereClauses []string + + var ageMinutes int + if spec.Age != "" { + age, err := duration.ParseDuration(spec.Age) + if err != nil { + return fmt.Errorf("error parsing age %s as duration: %w", spec.Age, err) + } + ageMinutes = int(age.Minutes()) + + whereClauses = append(whereClauses, `((now()- created_at) > interval '1 minute' * @ageMinutes)`) + } + + if spec.Count > 0 { + whereClauses = append(whereClauses, `seq > @count`) + } + + if len(whereClauses) == 0 { + return fmt.Errorf("both age and count cannot be empty") + } + + query := fmt.Sprintf(` + WITH latest_config_changes AS ( + SELECT id, change_type, created_at, ROW_NUMBER() OVER(ORDER BY created_at DESC) AS seq + FROM config_changes + WHERE + change_type = @changeType AND + config_id IN (SELECT id FROM config_items WHERE scraper_id = @scraperID) + ) + DELETE FROM config_changes + WHERE id IN ( + SELECT id from latest_config_changes + WHERE %s + ) + `, strings.Join(whereClauses, " OR ")) + + result := ctx.DB().Exec(query, + sql.Named("changeType", spec.Name), + sql.Named("scraperID", scraperID), + sql.Named("ageMinutes", ageMinutes), + sql.Named("count", spec.Count), + ) + if err := result.Error; err != nil { + return fmt.Errorf("error retaining config changes: %w", err) + } + + if result.RowsAffected > 0 { + logger.Infof("Deleted %d config_changes as per ChangeRetentionSpec[%s]", result.RowsAffected, spec.Name) + } + return nil +} diff --git a/scrapers/run.go b/scrapers/run.go index 96086365..e27b733c 100644 --- a/scrapers/run.go +++ b/scrapers/run.go @@ -20,10 +20,10 @@ func RunScraper(ctx api.ScrapeContext) (v1.ScrapeResults, error) { return nil, fmt.Errorf("failed to update db: %w", dbErr) } - // If error in any of the scrape results, don't delete old items - if len(results) > 0 && !v1.ScrapeResults(results).HasErr() { - persistedID := ctx.ScrapeConfig().GetPersistedID() - if persistedID != nil { + persistedID := ctx.ScrapeConfig().GetPersistedID() + if persistedID != nil { + // If error in any of the scrape results, don't delete old items + if len(results) > 0 && !v1.ScrapeResults(results).HasErr() { if err := DeleteStaleConfigItems(ctx.DutyContext(), *persistedID); err != nil { return nil, fmt.Errorf("error deleting stale config items: %w", err) } diff --git a/scrapers/runscrapers_test.go b/scrapers/runscrapers_test.go index 36a74d01..af28abf6 100644 --- a/scrapers/runscrapers_test.go +++ b/scrapers/runscrapers_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "os" + "time" "github.com/flanksource/commons/logger" "github.com/flanksource/config-db/api" @@ -12,7 +13,10 @@ import ( "github.com/flanksource/config-db/db" "github.com/flanksource/config-db/db/models" "github.com/flanksource/duty" + "github.com/flanksource/duty/context" + dutymodels "github.com/flanksource/duty/models" "github.com/flanksource/duty/types" + "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -218,6 +222,98 @@ var _ = Describe("Scrapers test", Ordered, func() { Expect(configItem, storedConfigItem) }) + + It("should retain config changes as per the spec", func() { + dummyScraper := dutymodels.ConfigScraper{ + Name: "Test", + Spec: `{"foo":"bar"}`, + Source: dutymodels.SourceConfigFile, + } + err := db.DefaultDB().Create(&dummyScraper).Error + Expect(err).To(BeNil()) + + configItemID := uuid.New().String() + dummyCI := models.ConfigItem{ + ID: configItemID, + ConfigClass: "Test", + ScraperID: &dummyScraper.ID, + } + err = db.DefaultDB().Create(&dummyCI).Error + Expect(err).To(BeNil()) + + twoDaysAgo := time.Now().Add(-2 * 24 * time.Hour) + fiveDaysAgo := time.Now().Add(-5 * 24 * time.Hour) + tenDaysAgo := time.Now().Add(-10 * 24 * time.Hour) + configChanges := []models.ConfigChange{ + {ConfigID: configItemID, ChangeType: "TestDiff", ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &twoDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &twoDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &twoDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &twoDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &fiveDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &fiveDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &fiveDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &fiveDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &fiveDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &fiveDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &tenDaysAgo, ExternalChangeId: uuid.New().String()}, + {ConfigID: configItemID, ChangeType: "TestDiff", CreatedAt: &tenDaysAgo, ExternalChangeId: uuid.New().String()}, + } + + err = db.DefaultDB().Table("config_changes").Create(&configChanges).Error + Expect(err).To(BeNil()) + + var currentCount int + err = db.DefaultDB(). + Raw(`SELECT COUNT(*) FROM config_changes WHERE change_type = ? AND config_id = ?`, "TestDiff", configItemID). + Scan(¤tCount). + Error + Expect(err).To(BeNil()) + Expect(currentCount).To(Equal(len(configChanges))) + + ctx := context.NewContext(gocontext.Background()).WithDB(db.DefaultDB(), db.Pool) + + // Everything older than 8 days should be removed + err = ProcessChangeRetention(ctx, dummyScraper.ID, v1.ChangeRetentionSpec{Name: "TestDiff", Age: "8d"}) + Expect(err).To(BeNil()) + var count1 int + err = db.DefaultDB(). + Raw(`SELECT COUNT(*) FROM config_changes WHERE change_type = ? AND config_id = ?`, "TestDiff", configItemID). + Scan(&count1). + Error + Expect(err).To(BeNil()) + Expect(count1).To(Equal(15)) + + // Only keep latest 12 config changes + err = ProcessChangeRetention(ctx, dummyScraper.ID, v1.ChangeRetentionSpec{Name: "TestDiff", Count: 12}) + Expect(err).To(BeNil()) + var count2 int + err = db.DefaultDB(). + Raw(`SELECT COUNT(*) FROM config_changes WHERE change_type = ? AND config_id = ?`, "TestDiff", configItemID). + Scan(&count2). + Error + Expect(err).To(BeNil()) + Expect(count2).To(Equal(12)) + + // Keep config changes which are newer than 3 days and max count can be 10 + err = ProcessChangeRetention(ctx, dummyScraper.ID, v1.ChangeRetentionSpec{Name: "TestDiff", Age: "3d", Count: 10}) + Expect(err).To(BeNil()) + var count3 int + err = db.DefaultDB(). + Raw(`SELECT COUNT(*) FROM config_changes WHERE change_type = ? AND config_id = ?`, "TestDiff", configItemID). + Scan(&count3). + Error + Expect(err).To(BeNil()) + Expect(count3).To(Equal(9)) + + // No params in ChangeRetentionSpec should fail + err = ProcessChangeRetention(ctx, dummyScraper.ID, v1.ChangeRetentionSpec{Name: "TestDiff"}) + Expect(err).ToNot(BeNil()) + }) }) })