Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Downloader: ignore advisories by given patterns #419

Merged
merged 12 commits into from
Aug 16, 2023
Merged
31 changes: 24 additions & 7 deletions cmd/csaf_downloader/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package main
import (
"net/http"

"github.com/csaf-poc/csaf_distribution/v2/internal/filter"
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
)
Expand All @@ -29,6 +30,8 @@ type config struct {
Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"`
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
Folder string `long:"folder" short:"f" description:"Download into a given FOLDER" value-name:"FOLDER" toml:"folder"`
IgnorePattern []string `long:"ignorepattern" short:"i" description:"Dont download files if there URLs match any of the given PATTERNs" value-name:"PATTERN" toml:"ignorepattern"`

ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`

Expand All @@ -37,6 +40,8 @@ type config struct {
RemoteValidatorPresets []string `long:"validatorpreset" description:"One or more PRESETS to validate remotely" value-name:"PRESETS" toml:"validatorpreset"`

Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"`

ignorePattern filter.PatternMatcher
}

// configPaths are the potential file locations of the config file.
Expand All @@ -50,11 +55,9 @@ var configPaths = []string{
func parseArgsConfig() ([]string, *config, error) {
p := options.Parser[config]{
DefaultConfigLocations: configPaths,
ConfigLocation: func(cfg *config) string {
return cfg.Config
},
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
ConfigLocation: func(cfg *config) string { return cfg.Config },
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
SetDefaults: func(cfg *config) {
cfg.Worker = defaultWorker
cfg.RemoteValidatorPresets = []string{defaultPreset}
Expand All @@ -72,8 +75,22 @@ func parseArgsConfig() ([]string, *config, error) {
return p.Parse()
}

// ignoreFile returns true if the given URL should not be downloaded.
func (cfg *config) ignoreURL(u string) bool {
return cfg.ignorePattern.Matches(u)
}

// compileIgnorePatterns compiles the configure patterns to be ignored.
func (cfg *config) compileIgnorePatterns() error {
pm, err := filter.NewPatternMatcher(cfg.IgnorePattern)
if err != nil {
return err
}
cfg.ignorePattern = pm
return nil
}

// prepare prepares internal state of a loaded configuration.
func (cfg *config) prepare() error {
// TODO: Implement me!
return nil
return cfg.compileIgnorePatterns()
}
16 changes: 15 additions & 1 deletion cmd/csaf_downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,13 @@ nextAdvisory:
continue
}

if d.cfg.ignoreURL(file.URL()) {
if d.cfg.Verbose {
log.Printf("Ignoring %q.\n", file.URL())
}
continue
}

resp, err := client.Get(file.URL())
if err != nil {
log.Printf("WARN: cannot get '%s': %v\n", file.URL(), err)
Expand Down Expand Up @@ -474,8 +481,15 @@ nextAdvisory:
initialReleaseDate = initialReleaseDate.UTC()

// Write advisory to file
newDir := path.Join(d.directory, lower)

// Do we have a configured destination folder?
if d.cfg.Folder != "" {
newDir = path.Join(newDir, d.cfg.Folder)
} else {
newDir = path.Join(newDir, strconv.Itoa(initialReleaseDate.Year()))
}

newDir := path.Join(d.directory, lower, strconv.Itoa(initialReleaseDate.Year()))
if newDir != lastDir {
if err := d.mkdirAll(newDir, 0755); err != nil {
errorCh <- err
Expand Down
18 changes: 17 additions & 1 deletion docs/csaf_downloader.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Application Options:
-r, --rate= The average upper limit of https operations per second (defaults to unlimited)
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
-t, --timerange=RANGE RANGE of time from which advisories to download
-f, --folder=FOLDER Download into a given FOLDER
-i, --ignorepattern=PATTERN Dont download files if there URLs match any of the given PATTERNs
-H, --header= One or more extra HTTP header fields
--validator=URL URL to validate documents remotely
--validatorcache=FILE FILE to cache remote validations
Expand Down Expand Up @@ -51,11 +53,13 @@ ignoresigcheck = false
verbose = false
# rate # set to unlimited
worker = 2
# timerange # not set by default
# folder # not set by default
# ignorepattern # not set by default
# header # not set by default
# validator # not set by default
# validatorcache # not set by default
validatorpreset = ["mandatory"]
# timerange # not set by default
```

The `timerange` parameter enables downloading advisories which last changes falls
Expand Down Expand Up @@ -86,3 +90,15 @@ into a given intervall. There are three possible notations:
spans an interval from 1st January 2019 to the 1st January of 2024.

All interval boundaries are inclusive.

If the `folder` option is given the advisories are stored in this folder.
If it is omitted (by default) the advisories are stored in folder name by the
year they are from.

You can ignore certain advisories while downloading by specifying a list
of regular expressions to match their URLs by using the `ignorepattern` option.
E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain the sub strings **white** or **red**.
In the config file this has to be noted as:
```
ignorepattern = [".*white.*", ".*red.*"]
```
42 changes: 42 additions & 0 deletions internal/filter/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// This file is Free Software under the MIT License
// without warranty, see README.md and LICENSES/MIT.txt for details.
//
// SPDX-License-Identifier: MIT
//
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>

// Package filter implements helps to filter advisories.
package filter

import (
"fmt"
"regexp"
)

// PatternMatcher is a list of regular expressions.
type PatternMatcher []*regexp.Regexp

// NewPatternMatcher compiles a new list of regular expression from
// a given list of strings.
func NewPatternMatcher(patterns []string) (PatternMatcher, error) {
pm := make(PatternMatcher, 0, len(patterns))
for _, pattern := range patterns {
expr, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("invalid ignore pattern: %w", err)
}
pm = append(pm, expr)
}
return pm, nil
}

// Matches returns true if the given string matches any of the expressions.
func (pm PatternMatcher) Matches(s string) bool {
for _, expr := range pm {
if expr.MatchString(s) {
return true
}
}
return false
}
Loading