From 98bf2990ae3ea62778a4c4ca7f29605ca980bea1 Mon Sep 17 00:00:00 2001 From: "Sascha L. Teichmann" Date: Tue, 1 Aug 2023 21:38:55 +0200 Subject: [PATCH 1/6] Ignore advisories in checker. --- cmd/csaf_checker/config.go | 48 ++++++++++++++++++++++++++--------- cmd/csaf_checker/processor.go | 9 +++++++ docs/csaf_checker.md | 9 +++++++ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/cmd/csaf_checker/config.go b/cmd/csaf_checker/config.go index a2581d9e..aa456cc0 100644 --- a/cmd/csaf_checker/config.go +++ b/cmd/csaf_checker/config.go @@ -15,6 +15,7 @@ import ( "net/http" "time" + "github.com/csaf-poc/csaf_distribution/v2/internal/filter" "github.com/csaf-poc/csaf_distribution/v2/internal/models" "github.com/csaf-poc/csaf_distribution/v2/internal/options" ) @@ -29,16 +30,17 @@ const ( type config struct { Output string `short:"o" long:"output" description:"File name of the generated report" value-name:"REPORT-FILE" toml:"output"` //lint:ignore SA5008 We are using choice twice: json, html. - Format outputFormat `short:"f" long:"format" choice:"json" choice:"html" description:"Format of report" toml:"format"` - Insecure bool `long:"insecure" description:"Do not check TLS certificates from provider" toml:"insecure"` - ClientCert *string `long:"client-cert" description:"TLS client certificate file (PEM encoded data)" value-name:"CERT-FILE" toml:"client_cert"` - ClientKey *string `long:"client-key" description:"TLS client private key file (PEM encoded data)" value-name:"KEY-FILE" toml:"client_key"` - Version bool `long:"version" description:"Display version of the binary" toml:"-"` - Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"` - Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"` - Years *uint `long:"years" short:"y" description:"Number of years to look back from now" value-name:"YEARS" toml:"years"` - Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"` - ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"` + Format outputFormat `short:"f" long:"format" choice:"json" choice:"html" description:"Format of report" toml:"format"` + Insecure bool `long:"insecure" description:"Do not check TLS certificates from provider" toml:"insecure"` + ClientCert *string `long:"client-cert" description:"TLS client certificate file (PEM encoded data)" value-name:"CERT-FILE" toml:"client_cert"` + ClientKey *string `long:"client-key" description:"TLS client private key file (PEM encoded data)" value-name:"KEY-FILE" toml:"client_key"` + Version bool `long:"version" description:"Display version of the binary" toml:"-"` + Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"` + Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"` + Years *uint `long:"years" short:"y" description:"Number of years to look back from now" value-name:"YEARS" toml:"years"` + Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"` + IgnorePattern []string `long:"ignorepattern" short:"i" description:"Dont download files if there URLs match any of the given PATTERNs" value-name:"PATTERN" toml:"ignorepattern"` + ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"` RemoteValidator string `long:"validator" description:"URL to validate documents remotely" value-name:"URL" toml:"validator"` RemoteValidatorCache string `long:"validatorcache" description:"FILE to cache remote validations" value-name:"FILE" toml:"validator_cache"` @@ -46,8 +48,9 @@ type config struct { Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"` - clientCerts []tls.Certificate - ageAccept func(time.Time) bool + clientCerts []tls.Certificate + ageAccept func(time.Time) bool + ignorePattern filter.PatternMatcher } // configPaths are the potential file locations of the config file. @@ -104,8 +107,19 @@ func (cfg *config) protectedAccess() bool { return len(cfg.clientCerts) > 0 || len(cfg.ExtraHeader) > 0 } +// ignoreFile returns true if the given URL should not be downloaded. +func (cfg *config) ignoreURL(u string) bool { + return cfg.ignorePattern.Matches(u) +} + // prepare prepares internal state of a loaded configuration. func (cfg *config) prepare() error { + + // Pre-compile the regexes used to check if we need to ignore advisories. + if err := cfg.compileIgnorePatterns(); err != nil { + return err + } + // Load client certs. if err := cfg.prepareCertificates(); err != nil { return err @@ -114,6 +128,16 @@ func (cfg *config) prepare() error { return cfg.prepareTimeRangeFilter() } +// compileIgnorePatterns compiles the configure patterns to be ignored. +func (cfg *config) compileIgnorePatterns() error { + pm, err := filter.NewPatternMatcher(cfg.IgnorePattern) + if err != nil { + return err + } + cfg.ignorePattern = pm + return nil +} + // prepareCertificates loads the client side certificates used by the HTTP client. func (cfg *config) prepareCertificates() error { diff --git a/cmd/csaf_checker/processor.go b/cmd/csaf_checker/processor.go index bc1eeaa2..4da23ad7 100644 --- a/cmd/csaf_checker/processor.go +++ b/cmd/csaf_checker/processor.go @@ -642,6 +642,15 @@ func (p *processor) integrity( fp = makeAbs(fp) u := b.ResolveReference(fp).String() + + // Should this URL be ignored? + if p.cfg.ignoreURL(u) { + if p.cfg.Verbose { + log.Printf("Ignoring %q\n", u) + } + continue + } + if p.markChecked(u, mask) { continue } diff --git a/docs/csaf_checker.md b/docs/csaf_checker.md index c54f5050..4bc62ef3 100644 --- a/docs/csaf_checker.md +++ b/docs/csaf_checker.md @@ -17,6 +17,7 @@ Application Options: -r, --rate= The average upper limit of https operations per second (defaults to unlimited) -y, --years=YEARS Number of years to look back from now -t, --timerange=RANGE RANGE of time from which advisories to download + -i, --ignorepattern=PATTERN Dont download files if there URLs match any of the given PATTERNs -H, --header= One or more extra HTTP header fields --validator=URL URL to validate documents remotely --validatorcache=FILE FILE to cache remote validations @@ -98,6 +99,14 @@ It is only allowed to specify one off them. All interval boundaries are inclusive. +You can ignore certain advisories while checking by specifying a list +of regular expressions to match their URLs by using the `ignorepattern` option. +E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain the sub strings **white** or **red**. +In the config file this has to be noted as: +``` +ignorepattern = [".*white.*", ".*red.*"] +``` + ### Remarks The `role` given in the `provider-metadata.json` is not From 85f9d02ac080fbdd17901e431c8a2515f267f786 Mon Sep 17 00:00:00 2001 From: "Sascha L. Teichmann" Date: Tue, 1 Aug 2023 21:45:30 +0200 Subject: [PATCH 2/6] Rename config.check to config.prepare to make symmerical to other tools. --- cmd/csaf_aggregator/config.go | 3 ++- cmd/csaf_aggregator/main.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/csaf_aggregator/config.go b/cmd/csaf_aggregator/config.go index da91c2f1..27f1336d 100644 --- a/cmd/csaf_aggregator/config.go +++ b/cmd/csaf_aggregator/config.go @@ -307,7 +307,8 @@ func (c *config) setDefaults() { } } -func (c *config) check() error { +// prepare prepares internal state of a loaded configuration. +func (c *config) prepare() error { if len(c.Providers) == 0 { return errors.New("no providers given in configuration") } diff --git a/cmd/csaf_aggregator/main.go b/cmd/csaf_aggregator/main.go index d6261927..3411edc8 100644 --- a/cmd/csaf_aggregator/main.go +++ b/cmd/csaf_aggregator/main.go @@ -56,7 +56,7 @@ func lock(lockFile *string, fn func() error) error { func main() { _, cfg, err := parseArgsConfig() options.ErrorCheck(err) - options.ErrorCheck(cfg.check()) + options.ErrorCheck(cfg.prepare()) p := processor{cfg: cfg} errCheck(lock(cfg.LockFile, p.process)) From 607bd0ebe161ccbe72210848ee4b01d7879b92f6 Mon Sep 17 00:00:00 2001 From: "Sascha L. Teichmann" Date: Tue, 1 Aug 2023 22:16:14 +0200 Subject: [PATCH 3/6] Add ignore patterns to aggreagtor. --- cmd/csaf_aggregator/config.go | 46 +++++++++++++++++++++++++++++++++++ cmd/csaf_aggregator/mirror.go | 8 ++++++ docs/csaf_aggregator.md | 1 + docs/examples/aggregator.toml | 1 + 4 files changed, 56 insertions(+) diff --git a/cmd/csaf_aggregator/config.go b/cmd/csaf_aggregator/config.go index 27f1336d..b4205a4c 100644 --- a/cmd/csaf_aggregator/config.go +++ b/cmd/csaf_aggregator/config.go @@ -20,6 +20,7 @@ import ( "github.com/ProtonMail/gopenpgp/v2/crypto" "github.com/csaf-poc/csaf_distribution/v2/csaf" + "github.com/csaf-poc/csaf_distribution/v2/internal/filter" "github.com/csaf-poc/csaf_distribution/v2/internal/options" "github.com/csaf-poc/csaf_distribution/v2/util" "golang.org/x/time/rate" @@ -48,6 +49,10 @@ type provider struct { // UpdateInterval is as the mandatory `update_interval` if this is a publisher. UpdateInterval *string `toml:"update_interval"` + + // IgnorePattern is a list of patterns of advisory URLs to be ignored. + IgnorePattern []string `toml:"ignorepattern"` + ignorePattern filter.PatternMatcher } type config struct { @@ -90,6 +95,10 @@ type config struct { // 'update_interval'. UpdateInterval *string `toml:"update_interval"` + // IgnorePattern is a list of patterns of advisory URLs to be ignored. + IgnorePattern []string `toml:"ignorepattern"` + ignorePattern filter.PatternMatcher + Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"` keyMu sync.Mutex @@ -128,6 +137,11 @@ func (c *config) tooOldForInterims() func(time.Time) bool { return func(t time.Time) bool { return t.Before(from) } } +// ignoreFile returns true if the given URL should not be downloaded. +func (p *provider) ignoreURL(u string, c *config) bool { + return p.ignorePattern.Matches(u) || c.ignorePattern.Matches(u) +} + // updateInterval returns the update interval of a publisher. func (p *provider) updateInterval(c *config) string { if p.UpdateInterval != nil { @@ -307,12 +321,44 @@ func (c *config) setDefaults() { } } +// compileIgnorePatterns compiles the configured patterns to be ignored. +func (p *provider) compileIgnorePatterns() error { + pm, err := filter.NewPatternMatcher(p.IgnorePattern) + if err != nil { + return err + } + p.ignorePattern = pm + return nil +} + +// compileIgnorePatterns compiles the configured patterns to be ignored. +func (c *config) compileIgnorePatterns() error { + // Compile the top level patterns. + pm, err := filter.NewPatternMatcher(c.IgnorePattern) + if err != nil { + return err + } + c.ignorePattern = pm + // Compile the patterns of the providers. + for _, p := range c.Providers { + if err := p.compileIgnorePatterns(); err != nil { + return fmt.Errorf("invalid ignore patterns for %q: %w", p.Name, err) + } + } + return nil +} + // prepare prepares internal state of a loaded configuration. func (c *config) prepare() error { + if len(c.Providers) == 0 { return errors.New("no providers given in configuration") } + if err := c.compileIgnorePatterns(); err != nil { + return err + } + if err := c.Aggregator.Validate(); err != nil { return err } diff --git a/cmd/csaf_aggregator/mirror.go b/cmd/csaf_aggregator/mirror.go index 0fd1de02..64ef18a4 100644 --- a/cmd/csaf_aggregator/mirror.go +++ b/cmd/csaf_aggregator/mirror.go @@ -500,6 +500,14 @@ func (w *worker) mirrorFiles(tlpLabel csaf.TLPLabel, files []csaf.AdvisoryFile) continue } + // Should we ignore this advisory? + if w.provider.ignoreURL(file.URL(), w.processor.cfg) { + if w.processor.cfg.Verbose { + log.Printf("Ignoring %s: %q\n", w.provider.Name, file.URL()) + } + continue + } + // Ignore not conforming filenames. filename := filepath.Base(u.Path) if !util.ConformingFileName(filename) { diff --git a/docs/csaf_aggregator.md b/docs/csaf_aggregator.md index edbe8e81..23577b70 100644 --- a/docs/csaf_aggregator.md +++ b/docs/csaf_aggregator.md @@ -94,6 +94,7 @@ lock_file // path to lockfile, to stop other instances if one is n interim_years // limiting the years for which interim documents are searched (default 0) verbose // print more diagnostic output, e.g. https requests (default false) allow_single_provider // debugging option (default false) +ignorepattern // patterns of advisory URLs to be ignored ``` Next we have two TOML _tables_: diff --git a/docs/examples/aggregator.toml b/docs/examples/aggregator.toml index 597144c7..80f9b8a9 100644 --- a/docs/examples/aggregator.toml +++ b/docs/examples/aggregator.toml @@ -48,3 +48,4 @@ insecure = true # If aggregator.category == "aggreator", set for an entry that should # be listed in addition: category = "lister" +# ignorepattern = [".*white.*", ".*red.*"] From b6e5af9b4991204216fb62c223dbbca4d111f01a Mon Sep 17 00:00:00 2001 From: JanHoefelmeyer Date: Wed, 2 Aug 2023 08:36:05 +0200 Subject: [PATCH 4/6] Clarified docs on where and how to use ignorepattern for aggregator --- docs/csaf_aggregator.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/csaf_aggregator.md b/docs/csaf_aggregator.md index 23577b70..2a9e4829 100644 --- a/docs/csaf_aggregator.md +++ b/docs/csaf_aggregator.md @@ -124,6 +124,7 @@ category update_interval create_service_document categories +ignorepattern ``` Where valid `name` and `domain` settings are required. @@ -205,6 +206,7 @@ insecure = true # If aggregator.category == "aggregator", set for an entry that should # be listed in addition: category = "lister" +# ignorepattern = [".*white.*", ".*red.*"] ``` From 873eb4879bec4c8f63ea52787a8600324e693684 Mon Sep 17 00:00:00 2001 From: "Sascha L. Teichmann" Date: Wed, 2 Aug 2023 20:01:04 +0200 Subject: [PATCH 5/6] Add time range to checker report. --- cmd/csaf_checker/config.go | 16 ++++------------ cmd/csaf_checker/processor.go | 13 +++++++------ cmd/csaf_checker/report.go | 8 +++++--- cmd/csaf_checker/tmpl/report.html | 22 ++++++++++++++++++++-- internal/models/models.go | 19 ++++++++++++++++++- 5 files changed, 54 insertions(+), 24 deletions(-) diff --git a/cmd/csaf_checker/config.go b/cmd/csaf_checker/config.go index aa456cc0..396df819 100644 --- a/cmd/csaf_checker/config.go +++ b/cmd/csaf_checker/config.go @@ -13,7 +13,6 @@ import ( "errors" "fmt" "net/http" - "time" "github.com/csaf-poc/csaf_distribution/v2/internal/filter" "github.com/csaf-poc/csaf_distribution/v2/internal/models" @@ -49,7 +48,7 @@ type config struct { Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"` clientCerts []tls.Certificate - ageAccept func(time.Time) bool + ageAccept *models.TimeRange ignorePattern filter.PatternMatcher } @@ -156,14 +155,6 @@ func (cfg *config) prepareCertificates() error { return nil } -// acceptYears returns a filter that accepts advisories from the last years. -func acceptYears(years uint) func(time.Time) bool { - good := time.Now().AddDate(-int(years), 0, 0) - return func(t time.Time) bool { - return !t.Before(good) - } -} - // prepareTimeRangeFilter sets up the filter in which time range // advisory should be considered for checking. func (cfg *config) prepareTimeRangeFilter() error { @@ -172,10 +163,11 @@ func (cfg *config) prepareTimeRangeFilter() error { return errors.New(`"timerange" and "years" are both configured: only one allowed`) case cfg.Years != nil: - cfg.ageAccept = acceptYears(*cfg.Years) + years := models.NYears(*cfg.Years) + cfg.ageAccept = &years case cfg.Range != nil: - cfg.ageAccept = cfg.Range.Contains + cfg.ageAccept = cfg.Range } return nil } diff --git a/cmd/csaf_checker/processor.go b/cmd/csaf_checker/processor.go index 4da23ad7..e6fecd8c 100644 --- a/cmd/csaf_checker/processor.go +++ b/cmd/csaf_checker/processor.go @@ -241,8 +241,9 @@ func (p *processor) clean() { func (p *processor) run(domains []string) (*Report, error) { report := Report{ - Date: ReportTime{Time: time.Now().UTC()}, - Version: util.SemVersion, + Date: ReportTime{Time: time.Now().UTC()}, + Version: util.SemVersion, + TimeRange: p.cfg.ageAccept, } for _, d := range domains { @@ -545,8 +546,8 @@ func (p *processor) rolieFeedEntries(feed string) ([]csaf.AdvisoryFile, error) { rfeed.Entries(func(entry *csaf.Entry) { // Filter if we have date checking. - if p.cfg.ageAccept != nil { - if pub := time.Time(entry.Published); !pub.IsZero() && !p.cfg.ageAccept(pub) { + if accept := p.cfg.ageAccept; accept != nil { + if pub := time.Time(entry.Published); !pub.IsZero() && !accept.Contains(pub) { return } } @@ -666,7 +667,7 @@ func (p *processor) integrity( if m := yearFromURL.FindStringSubmatch(u); m != nil { year, _ := strconv.Atoi(m[1]) // Check if we are in checking time interval. - if p.cfg.ageAccept != nil && !p.cfg.ageAccept( + if accept := p.cfg.ageAccept; accept != nil && !accept.Contains( time.Date( year, 12, 31, // Assume last day of year. 23, 59, 59, 0, // 23:59:59 @@ -972,7 +973,7 @@ func (p *processor) checkChanges(base string, mask whereType) error { return nil, nil, err } // Apply date range filtering. - if p.cfg.ageAccept != nil && !p.cfg.ageAccept(t) { + if accept := p.cfg.ageAccept; accept != nil && !accept.Contains(t) { continue } path := r[pathColumn] diff --git a/cmd/csaf_checker/report.go b/cmd/csaf_checker/report.go index a50c61ba..d8c46fc3 100644 --- a/cmd/csaf_checker/report.go +++ b/cmd/csaf_checker/report.go @@ -19,6 +19,7 @@ import ( "time" "github.com/csaf-poc/csaf_distribution/v2/csaf" + "github.com/csaf-poc/csaf_distribution/v2/internal/models" ) // MessageType is the kind of the message. @@ -60,9 +61,10 @@ type ReportTime struct{ time.Time } // Report is the overall report. type Report struct { - Domains []*Domain `json:"domains,omitempty"` - Version string `json:"version,omitempty"` - Date ReportTime `json:"date,omitempty"` + Domains []*Domain `json:"domains,omitempty"` + Version string `json:"version,omitempty"` + Date ReportTime `json:"date,omitempty"` + TimeRange *models.TimeRange `json:"timerange,omitempty"` } // MarshalText implements the encoding.TextMarshaller interface. diff --git a/cmd/csaf_checker/tmpl/report.html b/cmd/csaf_checker/tmpl/report.html index 29e83efb..36c3bfac 100644 --- a/cmd/csaf_checker/tmpl/report.html +++ b/cmd/csaf_checker/tmpl/report.html @@ -62,8 +62,26 @@

{{ .Name }}{{ if .HasErrors }} (failed){{ end }}

{{ end }}
- Date of run: - csaf_checker v{{ .Version }} +
+ Runtime + + + + + + {{ if .TimeRange }}{{ with .TimeRange }} + + + + + {{ end }}{{ end }} + + + + +
Date of run:
Time range: - +
Version:csaf_checker v{{ .Version }}
+
diff --git a/internal/models/models.go b/internal/models/models.go index a7f6b02a..caacd0f0 100644 --- a/internal/models/models.go +++ b/internal/models/models.go @@ -10,6 +10,7 @@ package models import ( + "encoding/json" "fmt" "strings" "time" @@ -27,10 +28,17 @@ func NewTimeInterval(a, b time.Time) TimeRange { return TimeRange{a, b} } +// NYears returns a time interval spanning the last years. +func NYears(years uint) TimeRange { + now := time.Now() + start := now.AddDate(-int(years), 0, 0) + return NewTimeInterval(start, now) +} + // guessDate tries to guess an RFC 3339 date time from a given string. func guessDate(s string) (time.Time, bool) { for _, layout := range []string{ - "2006-01-02T15:04:05Z07:00", + time.RFC3339, "2006-01-02T15:04:05", "2006-01-02T15:04", "2006-01-02T15", @@ -50,6 +58,15 @@ func (tr *TimeRange) UnmarshalText(text []byte) error { return tr.UnmarshalFlag(string(text)) } +// MarshalJSON implements [encoding/json.Marshaler]. +func (tr TimeRange) MarshalJSON() ([]byte, error) { + s := []string{ + tr[0].Format(time.RFC3339), + tr[1].Format(time.RFC3339), + } + return json.Marshal(s) +} + // UnmarshalFlag implements [go-flags/Unmarshaler]. func (tr *TimeRange) UnmarshalFlag(s string) error { s = strings.TrimSpace(s) From 7464ade6ae92161b479a33f5363b3cde58e4a278 Mon Sep 17 00:00:00 2001 From: "Sascha L. Teichmann" Date: Wed, 16 Aug 2023 20:29:25 +0200 Subject: [PATCH 6/6] Fix merge conflict oversight in doc. --- docs/csaf_checker.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/csaf_checker.md b/docs/csaf_checker.md index f253fccd..2a8a91ca 100644 --- a/docs/csaf_checker.md +++ b/docs/csaf_checker.md @@ -101,16 +101,11 @@ It is only allowed to specify one off them. All interval boundaries are inclusive. You can ignore certain advisories while checking by specifying a list -<<<<<<< HEAD -of regular expressions to match their URLs by using the `ignorepattern` option. -E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain the sub strings **white** or **red**. -======= of regular expressions to match their URLs by using the `ignorepattern` option. E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain -the sub strings **white** or **red**. ->>>>>>> main +the sub strings **white** or **red**. In the config file this has to be noted as: ``` ignorepattern = [".*white.*", ".*red.*"]