Skip to content

Commit

Permalink
Merge pull request #419 from csaf-poc/download-ignore-pattern
Browse files Browse the repository at this point in the history
Downloader: ignore advisories by given patterns
  • Loading branch information
JanHoefelmeyer authored Aug 16, 2023
2 parents 8c95795 + a4d7bea commit 2d1dc18
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 9 deletions.
33 changes: 24 additions & 9 deletions cmd/csaf_downloader/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package main
import (
"net/http"

"github.com/csaf-poc/csaf_distribution/v2/internal/filter"
"github.com/csaf-poc/csaf_distribution/v2/internal/models"
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
)
Expand All @@ -30,14 +31,16 @@ type config struct {
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`
Folder string `long:"folder" short:"f" description:"Download into a given subFOLDER" value-name:"FOLDER" toml:"folder"`

ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`
IgnorePattern []string `long:"ignorepattern" short:"i" description:"Do not download files if their URLs match any of the given PATTERNs" value-name:"PATTERN" toml:"ignorepattern"`
ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`

RemoteValidator string `long:"validator" description:"URL to validate documents remotely" value-name:"URL" toml:"validator"`
RemoteValidatorCache string `long:"validatorcache" description:"FILE to cache remote validations" value-name:"FILE" toml:"validatorcache"`
RemoteValidatorPresets []string `long:"validatorpreset" description:"One or more PRESETS to validate remotely" value-name:"PRESETS" toml:"validatorpreset"`

Config string `short:"c" long:"config" description:"Path to config TOML file" value-name:"TOML-FILE" toml:"-"`

ignorePattern filter.PatternMatcher
}

// configPaths are the potential file locations of the config file.
Expand All @@ -51,11 +54,9 @@ var configPaths = []string{
func parseArgsConfig() ([]string, *config, error) {
p := options.Parser[config]{
DefaultConfigLocations: configPaths,
ConfigLocation: func(cfg *config) string {
return cfg.Config
},
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
ConfigLocation: func(cfg *config) string { return cfg.Config },
Usage: "[OPTIONS] domain...",
HasVersion: func(cfg *config) bool { return cfg.Version },
SetDefaults: func(cfg *config) {
cfg.Worker = defaultWorker
cfg.RemoteValidatorPresets = []string{defaultPreset}
Expand All @@ -73,8 +74,22 @@ func parseArgsConfig() ([]string, *config, error) {
return p.Parse()
}

// ignoreFile returns true if the given URL should not be downloaded.
func (cfg *config) ignoreURL(u string) bool {
return cfg.ignorePattern.Matches(u)
}

// compileIgnorePatterns compiles the configure patterns to be ignored.
func (cfg *config) compileIgnorePatterns() error {
pm, err := filter.NewPatternMatcher(cfg.IgnorePattern)
if err != nil {
return err
}
cfg.ignorePattern = pm
return nil
}

// prepare prepares internal state of a loaded configuration.
func (cfg *config) prepare() error {
// TODO: Implement me!
return nil
return cfg.compileIgnorePatterns()
}
7 changes: 7 additions & 0 deletions cmd/csaf_downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,13 @@ nextAdvisory:
continue
}

if d.cfg.ignoreURL(file.URL()) {
if d.cfg.Verbose {
log.Printf("Ignoring %q.\n", file.URL())
}
continue
}

resp, err := client.Get(file.URL())
if err != nil {
log.Printf("WARN: cannot get '%s': %v\n", file.URL(), err)
Expand Down
13 changes: 13 additions & 0 deletions docs/csaf_downloader.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Application Options:
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
-t, --timerange=RANGE RANGE of time from which advisories to download
-f, --folder=FOLDER Download all into a given subFOLDER
-i, --ignorepattern=PATTERN Do not download files if their URLs match any of the given PATTERNs
-H, --header= One or more extra HTTP header fields
--validator=URL URL to validate documents remotely
--validatorcache=FILE FILE to cache remote validations
Expand Down Expand Up @@ -54,6 +55,7 @@ verbose = false
worker = 2
# timerange # not set by default
# folder # not set by default
# ignorepattern # not set by default
# header # not set by default
# validator # not set by default
# validatorcache # not set by default
Expand Down Expand Up @@ -92,3 +94,14 @@ All interval boundaries are inclusive.
If the `folder` option is given all the advisories are stored in a subfolder
of this name. Otherwise the advisories are each stored in a folder named
by the year they are from.

You can ignore certain advisories while downloading by specifying a list
of regular expressions to match their URLs by using the `ignorepattern`
option.

E.g. `-i='.*white.*' -i='*.red.*'` will ignore files which URLs contain
the sub strings **white** or **red**.
In the config file this has to be noted as:
```
ignorepattern = [".*white.*", ".*red.*"]
```
42 changes: 42 additions & 0 deletions internal/filter/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// This file is Free Software under the MIT License
// without warranty, see README.md and LICENSES/MIT.txt for details.
//
// SPDX-License-Identifier: MIT
//
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>

// Package filter implements helps to filter advisories.
package filter

import (
"fmt"
"regexp"
)

// PatternMatcher is a list of regular expressions.
type PatternMatcher []*regexp.Regexp

// NewPatternMatcher compiles a new list of regular expression from
// a given list of strings.
func NewPatternMatcher(patterns []string) (PatternMatcher, error) {
pm := make(PatternMatcher, 0, len(patterns))
for _, pattern := range patterns {
expr, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("invalid ignore pattern: %w", err)
}
pm = append(pm, expr)
}
return pm, nil
}

// Matches returns true if the given string matches any of the expressions.
func (pm PatternMatcher) Matches(s string) bool {
for _, expr := range pm {
if expr.MatchString(s) {
return true
}
}
return false
}

0 comments on commit 2d1dc18

Please sign in to comment.