Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Downloader: ignore advisories by given patterns #419

Merged
merged 12 commits into from
Aug 16, 2023
Merged
32 changes: 25 additions & 7 deletions cmd/csaf_downloader/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package main
import (
"net/http"

"github.com/csaf-poc/csaf_distribution/v2/internal/filter"
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
)
Expand All @@ -30,6 +31,9 @@ type config struct {
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
Folder string `long:"folder" short:"f" description:"Download into a given subFOLDER" value-name:"FOLDER" toml:"folder"`
IgnorePattern []string `long:"ignorepattern" short:"i" description:"Dont download files if there URLs match any of the given PATTERNs" value-name:"PATTERN" toml:"ignorepattern"`



ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`

Expand All @@ -38,6 +42,8 @@ type config struct {
RemoteValidatorPresets []string `long:"validatorpreset" description:"One or more PRESETS to validate remotely" value-name:"PRESETS" toml:"validatorpreset"`

Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"`

ignorePattern filter.PatternMatcher
}

// configPaths are the potential file locations of the config file.
Expand All @@ -51,11 +57,9 @@ var configPaths = []string{
func parseArgsConfig() ([]string, *config, error) {
p := options.Parser[config]{
DefaultConfigLocations: configPaths,
ConfigLocation: func(cfg *config) string {
return cfg.Config
},
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
ConfigLocation: func(cfg *config) string { return cfg.Config },
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
SetDefaults: func(cfg *config) {
cfg.Worker = defaultWorker
cfg.RemoteValidatorPresets = []string{defaultPreset}
Expand All @@ -73,8 +77,22 @@ func parseArgsConfig() ([]string, *config, error) {
return p.Parse()
}

// ignoreFile returns true if the given URL should not be downloaded.
func (cfg *config) ignoreURL(u string) bool {
return cfg.ignorePattern.Matches(u)
}

// compileIgnorePatterns compiles the configure patterns to be ignored.
func (cfg *config) compileIgnorePatterns() error {
pm, err := filter.NewPatternMatcher(cfg.IgnorePattern)
if err != nil {
return err
}
cfg.ignorePattern = pm
return nil
}

// prepare prepares internal state of a loaded configuration.
func (cfg *config) prepare() error {
// TODO: Implement me!
return nil
return cfg.compileIgnorePatterns()
}
7 changes: 7 additions & 0 deletions cmd/csaf_downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,13 @@ nextAdvisory:
continue
}

if d.cfg.ignoreURL(file.URL()) {
if d.cfg.Verbose {
log.Printf("Ignoring %q.\n", file.URL())
}
continue
}

resp, err := client.Get(file.URL())
if err != nil {
log.Printf("WARN: cannot get '%s': %v\n", file.URL(), err)
Expand Down
17 changes: 14 additions & 3 deletions docs/csaf_downloader.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Application Options:
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
-t, --timerange=RANGE RANGE of time from which advisories to download
-f, --folder=FOLDER Download all into a given subFOLDER
-i, --ignorepattern=PATTERN Dont download files if there URLs match any of the given PATTERNs
-H, --header= One or more extra HTTP header fields
--validator=URL URL to validate documents remotely
--validatorcache=FILE FILE to cache remote validations
Expand Down Expand Up @@ -54,6 +55,7 @@ verbose = false
worker = 2
# timerange # not set by default
# folder # not set by default
# ignorepattern # not set by default
# header # not set by default
# validator # not set by default
# validatorcache # not set by default
Expand Down Expand Up @@ -89,6 +91,15 @@ into a given intervall. There are three possible notations:

All interval boundaries are inclusive.

If the `folder` option is given all the advisories are stored in a subfolder
of this name. Otherwise the advisories are each stored in a folder named
by the year they are from.
If the `folder` option is given the advisories are all stored in a subfolder
of this name.
If it is omitted (by default) the advisories are stored in folder name by the
year they are from.

You can ignore certain advisories while downloading by specifying a list
of regular expressions to match their URLs by using the `ignorepattern` option.
E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain the sub strings **white** or **red**.
In the config file this has to be noted as:
```
ignorepattern = [".*white.*", ".*red.*"]
```
42 changes: 42 additions & 0 deletions internal/filter/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// This file is Free Software under the MIT License
// without warranty, see README.md and LICENSES/MIT.txt for details.
//
// SPDX-License-Identifier: MIT
//
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>

// Package filter implements helps to filter advisories.
package filter

import (
"fmt"
"regexp"
)

// PatternMatcher is a list of regular expressions.
type PatternMatcher []*regexp.Regexp

// NewPatternMatcher compiles a new list of regular expression from
// a given list of strings.
func NewPatternMatcher(patterns []string) (PatternMatcher, error) {
pm := make(PatternMatcher, 0, len(patterns))
for _, pattern := range patterns {
expr, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("invalid ignore pattern: %w", err)
}
pm = append(pm, expr)
}
return pm, nil
}

// Matches returns true if the given string matches any of the expressions.
func (pm PatternMatcher) Matches(s string) bool {
for _, expr := range pm {
if expr.MatchString(s) {
return true
}
}
return false
}
Loading