From dc9e06f596e00749a481a03c49733faf38d6f1b0 Mon Sep 17 00:00:00 2001 From: Massimiliano Giovagnoli Date: Sun, 13 Oct 2024 18:08:34 +0200 Subject: [PATCH] refactor(amazonlinux): share next gen pkg search logics Signed-off-by: Massimiliano Giovagnoli --- internal/utils/config.go | 8 ++ pkg/distro/amazonlinux/amazonlinux.go | 120 +++++++++++++++++++- pkg/distro/amazonlinux/v1/amazonlinux.go | 2 +- pkg/distro/amazonlinux/v2/amazonlinux.go | 2 +- pkg/distro/amazonlinux/v2022/amazonlinux.go | 113 ------------------ pkg/distro/amazonlinux/v2023/amazonlinux.go | 113 ------------------ pkg/distro/constants.go | 3 +- 7 files changed, 127 insertions(+), 234 deletions(-) diff --git a/internal/utils/config.go b/internal/utils/config.go index 38c7f98..1afeb13 100644 --- a/internal/utils/config.go +++ b/internal/utils/config.go @@ -57,6 +57,14 @@ func GetDistroConfigAndVarsFromViper(viper *v.Viper) (d.Config, error) { allsettings = amazonLinuxV2022.AllSettings() } + if amazonLinuxV2023 := distros.Sub(d.AmazonLinuxV2023Type); amazonLinuxV2023 != nil { + if err := amazonLinuxV2023.Unmarshal(&config); err != nil { + return d.Config{}, err + } + + allsettings = amazonLinuxV2023.AllSettings() + } + if debian := distros.Sub(d.DebianType); debian != nil { if err := debian.Unmarshal(&config); err != nil { return d.Config{}, err diff --git a/pkg/distro/amazonlinux/amazonlinux.go b/pkg/distro/amazonlinux/amazonlinux.go index 2006aab..e184f06 100644 --- a/pkg/distro/amazonlinux/amazonlinux.go +++ b/pkg/distro/amazonlinux/amazonlinux.go @@ -1,11 +1,16 @@ package amazonlinux import ( + "context" + "io" + "net/http" "net/url" + "strings" "github.com/maxgio92/krawler/pkg/distro" "github.com/maxgio92/krawler/pkg/output" p "github.com/maxgio92/krawler/pkg/packages" + "github.com/maxgio92/krawler/pkg/packages/rpm" "github.com/maxgio92/krawler/pkg/scrape" ) @@ -24,7 +29,7 @@ func (a *AmazonLinux) ConfigureCommon(def distro.Config, config distro.Config) e return nil } -// Returns the list of version-specific mirror URLs. +// BuildMirrorURLs returns the list of version-specific mirror URLs. func (a *AmazonLinux) BuildMirrorURLs(mirrors []p.Mirror, versions []distro.Version) ([]*url.URL, error) { versions, err := a.buildVersions(mirrors, versions) if err != nil { @@ -51,8 +56,8 @@ func (a *AmazonLinux) BuildMirrorURLs(mirrors []p.Mirror, versions []distro.Vers return nil, distro.ErrNoDistroVersionSpecified } -// Returns the list of repositories URLs. -func BuildRepositoriesURLs(roots []*url.URL, repositories []p.Repository) ([]*url.URL, error) { +// BuildRepositoryURLs returns the list of repositories URLs. +func BuildRepositoryURLs(roots []*url.URL, repositories []p.Repository) ([]*url.URL, error) { var urls []*url.URL for _, root := range roots { @@ -74,7 +79,7 @@ func BuildRepositoriesURLs(roots []*url.URL, repositories []p.Repository) ([]*ur return urls, nil } -// Returns a list of distro versions, considering the user-provided configuration, +// buildVersions returns a list of distro versions, considering the user-provided configuration, // and if not, the ones available on configured mirrors. func (a *AmazonLinux) buildVersions(mirrors []p.Mirror, staticVersions []distro.Version) ([]distro.Version, error) { if staticVersions != nil { @@ -91,7 +96,7 @@ func (a *AmazonLinux) buildVersions(mirrors []p.Mirror, staticVersions []distro. return dynamicVersions, nil } -// Returns the list of the current available distro versions, by scraping +// crawlVersions returns the list of the current available distro versions, by scraping // the specified mirrors, dynamically. func (a *AmazonLinux) crawlVersions(mirrors []p.Mirror) ([]distro.Version, error) { versions := []distro.Version{} @@ -123,3 +128,108 @@ func (a *AmazonLinux) crawlVersions(mirrors []p.Mirror) ([]distro.Version, error return versions, nil } + +// SearchPackages scrapes each mirror, for each distro version, for each repository, +// for each architecture, and returns slice of Package and optionally an error. +func (a *AmazonLinux) SearchPackages(options p.SearchOptions) ([]p.Package, error) { + a.Config.Output.Logger = options.Log() + + // Build distribution version-specific mirror root URLs. + perVersionMirrorURLs, err := a.BuildMirrorURLs(a.Config.Mirrors, a.Config.Versions) + if err != nil { + return nil, err + } + + // Build available repository URLs based on provided configuration, + // for each distribution version. + repositoriesURLrefs, err := BuildRepositoryURLs(perVersionMirrorURLs, a.Config.Repositories) + if err != nil { + return nil, err + } + + // Dereference repository URLs. + repositoryURLs, err := a.dereferenceRepositoryURLs(repositoriesURLrefs, a.Config.Archs) + if err != nil { + return nil, err + } + + // Get RPM packages from each repository. + rss := []string{} + for _, ru := range repositoryURLs { + rss = append(rss, ru.String()) + } + + searchOptions := rpm.NewSearchOptions(&options, a.Config.Archs, rss) + rpmPackages, err := rpm.SearchPackages(searchOptions) + if err != nil { + return nil, err + } + + return rpmPackages, nil +} + +func (a *AmazonLinux) dereferenceRepositoryURLs(repoURLs []*url.URL, archs []p.Architecture) ([]*url.URL, error) { + var urls []*url.URL + + for _, ar := range archs { + for _, v := range repoURLs { + r, err := a.dereferenceRepositoryURL(v, ar) + if err != nil { + return nil, err + } + + if r != nil { + urls = append(urls, r) + } + } + } + + return urls, nil +} + +func (a *AmazonLinux) dereferenceRepositoryURL(src *url.URL, arch p.Architecture) (*url.URL, error) { + var dest *url.URL + + mirrorListURL, err := url.JoinPath(src.String(), string(arch), "mirror.list") + if err != nil { + return nil, err + } + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, mirrorListURL, nil) + if err != nil { + return nil, err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + a.Config.Output.Logger.Error("Amazon Linux v2023 repository URL not valid to be dereferenced") + //nolint:nilnil + return nil, nil + } + + if resp.Body == nil { + a.Config.Output.Logger.Error("empty response from Amazon Linux v2023 repository reference URL") + //nolint:nilnil + return nil, nil + } + + b, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + // Get first repository URL available, no matter what the geolocation. + s := strings.Split(string(b), "\n")[0] + + dest, err = url.Parse(s) + if err != nil { + return nil, err + } + + return dest, nil +} diff --git a/pkg/distro/amazonlinux/v1/amazonlinux.go b/pkg/distro/amazonlinux/v1/amazonlinux.go index 699f756..21dda46 100644 --- a/pkg/distro/amazonlinux/v1/amazonlinux.go +++ b/pkg/distro/amazonlinux/v1/amazonlinux.go @@ -34,7 +34,7 @@ func (a *AmazonLinux) SearchPackages(options packages.SearchOptions) ([]packages // Build available repository URLs based on provided configuration, // for each distribution version. - repositoriesURLrefs, err := common.BuildRepositoriesURLs(perVersionMirrorURLs, a.Config.Repositories) + repositoriesURLrefs, err := common.BuildRepositoryURLs(perVersionMirrorURLs, a.Config.Repositories) if err != nil { return nil, err } diff --git a/pkg/distro/amazonlinux/v2/amazonlinux.go b/pkg/distro/amazonlinux/v2/amazonlinux.go index 9d9faba..0e4a1b8 100644 --- a/pkg/distro/amazonlinux/v2/amazonlinux.go +++ b/pkg/distro/amazonlinux/v2/amazonlinux.go @@ -34,7 +34,7 @@ func (a *AmazonLinux) SearchPackages(options packages.SearchOptions) ([]packages // Build available repository URLs based on provided configuration, // for each distribution version. - repositoriesURLrefs, err := common.BuildRepositoriesURLs(perVersionMirrorURLs, a.Config.Repositories) + repositoriesURLrefs, err := common.BuildRepositoryURLs(perVersionMirrorURLs, a.Config.Repositories) if err != nil { return nil, err } diff --git a/pkg/distro/amazonlinux/v2022/amazonlinux.go b/pkg/distro/amazonlinux/v2022/amazonlinux.go index 02eaf3c..b9d31d2 100644 --- a/pkg/distro/amazonlinux/v2022/amazonlinux.go +++ b/pkg/distro/amazonlinux/v2022/amazonlinux.go @@ -1,16 +1,8 @@ package v2022 import ( - "context" - "io" - "net/http" - "net/url" - "strings" - "github.com/maxgio92/krawler/pkg/distro" common "github.com/maxgio92/krawler/pkg/distro/amazonlinux" - packages "github.com/maxgio92/krawler/pkg/packages" - "github.com/maxgio92/krawler/pkg/packages/rpm" ) type AmazonLinux struct { @@ -20,108 +12,3 @@ type AmazonLinux struct { func (a *AmazonLinux) Configure(config distro.Config) error { return a.ConfigureCommon(DefaultConfig, config) } - -// GetPackages scrapes each mirror, for each distro version, for each repository, -// for each architecture, and returns slice of Package and optionally an error. -func (a *AmazonLinux) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { - a.Config.Output.Logger = options.Log() - - // Build distribution version-specific mirror root URLs. - perVersionMirrorURLs, err := a.BuildMirrorURLs(a.Config.Mirrors, a.Config.Versions) - if err != nil { - return nil, err - } - - // Build available repository URLs based on provided configuration, - // for each distribution version. - repositoriesURLrefs, err := common.BuildRepositoriesURLs(perVersionMirrorURLs, a.Config.Repositories) - if err != nil { - return nil, err - } - - // Dereference repository URLs. - repositoryURLs, err := a.dereferenceRepositoryURLs(repositoriesURLrefs, a.Config.Archs) - if err != nil { - return nil, err - } - - // Get RPM packages from each repository. - rss := []string{} - for _, ru := range repositoryURLs { - rss = append(rss, ru.String()) - } - - searchOptions := rpm.NewSearchOptions(&options, a.Config.Archs, rss) - rpmPackages, err := rpm.SearchPackages(searchOptions) - if err != nil { - return nil, err - } - - return rpmPackages, nil -} - -func (a *AmazonLinux) dereferenceRepositoryURLs(repoURLs []*url.URL, archs []packages.Architecture) ([]*url.URL, error) { - var urls []*url.URL - - for _, ar := range archs { - for _, v := range repoURLs { - r, err := a.dereferenceRepositoryURL(v, ar) - if err != nil { - return nil, err - } - - if r != nil { - urls = append(urls, r) - } - } - } - - return urls, nil -} - -func (a *AmazonLinux) dereferenceRepositoryURL(src *url.URL, arch packages.Architecture) (*url.URL, error) { - var dest *url.URL - - mirrorListURL, err := url.JoinPath(src.String(), string(arch), "mirror.list") - if err != nil { - return nil, err - } - - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, mirrorListURL, nil) - if err != nil { - return nil, err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - a.Config.Output.Logger.Error("Amazon Linux v2022 repository URL not valid to be dereferenced") - //nolint:nilnil - return nil, nil - } - - if resp.Body == nil { - a.Config.Output.Logger.Error("empty response from Amazon Linux v2022 repository reference URL") - //nolint:nilnil - return nil, nil - } - - b, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - - // Get first repository URL available, no matter what the geolocation. - s := strings.Split(string(b), "\n")[0] - - dest, err = url.Parse(s) - if err != nil { - return nil, err - } - - return dest, nil -} diff --git a/pkg/distro/amazonlinux/v2023/amazonlinux.go b/pkg/distro/amazonlinux/v2023/amazonlinux.go index ea8eab4..f4631a4 100644 --- a/pkg/distro/amazonlinux/v2023/amazonlinux.go +++ b/pkg/distro/amazonlinux/v2023/amazonlinux.go @@ -1,16 +1,8 @@ package v2023 import ( - "context" - "io" - "net/http" - "net/url" - "strings" - "github.com/maxgio92/krawler/pkg/distro" common "github.com/maxgio92/krawler/pkg/distro/amazonlinux" - packages "github.com/maxgio92/krawler/pkg/packages" - "github.com/maxgio92/krawler/pkg/packages/rpm" ) type AmazonLinux struct { @@ -20,108 +12,3 @@ type AmazonLinux struct { func (a *AmazonLinux) Configure(config distro.Config) error { return a.ConfigureCommon(DefaultConfig, config) } - -// SearchPackages scrapes each mirror, for each distro version, for each repository, -// for each architecture, and returns slice of Package and optionally an error. -func (a *AmazonLinux) SearchPackages(options packages.SearchOptions) ([]packages.Package, error) { - a.Config.Output.Logger = options.Log() - - // Build distribution version-specific mirror root URLs. - perVersionMirrorURLs, err := a.BuildMirrorURLs(a.Config.Mirrors, a.Config.Versions) - if err != nil { - return nil, err - } - - // Build available repository URLs based on provided configuration, - // for each distribution version. - repositoriesURLrefs, err := common.BuildRepositoriesURLs(perVersionMirrorURLs, a.Config.Repositories) - if err != nil { - return nil, err - } - - // Dereference repository URLs. - repositoryURLs, err := a.dereferenceRepositoryURLs(repositoriesURLrefs, a.Config.Archs) - if err != nil { - return nil, err - } - - // Get RPM packages from each repository. - rss := []string{} - for _, ru := range repositoryURLs { - rss = append(rss, ru.String()) - } - - searchOptions := rpm.NewSearchOptions(&options, a.Config.Archs, rss) - rpmPackages, err := rpm.SearchPackages(searchOptions) - if err != nil { - return nil, err - } - - return rpmPackages, nil -} - -func (a *AmazonLinux) dereferenceRepositoryURLs(repoURLs []*url.URL, archs []packages.Architecture) ([]*url.URL, error) { - var urls []*url.URL - - for _, ar := range archs { - for _, v := range repoURLs { - r, err := a.dereferenceRepositoryURL(v, ar) - if err != nil { - return nil, err - } - - if r != nil { - urls = append(urls, r) - } - } - } - - return urls, nil -} - -func (a *AmazonLinux) dereferenceRepositoryURL(src *url.URL, arch packages.Architecture) (*url.URL, error) { - var dest *url.URL - - mirrorListURL, err := url.JoinPath(src.String(), string(arch), "mirror.list") - if err != nil { - return nil, err - } - - req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, mirrorListURL, nil) - if err != nil { - return nil, err - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - a.Config.Output.Logger.Error("Amazon Linux v2023 repository URL not valid to be dereferenced") - //nolint:nilnil - return nil, nil - } - - if resp.Body == nil { - a.Config.Output.Logger.Error("empty response from Amazon Linux v2023 repository reference URL") - //nolint:nilnil - return nil, nil - } - - b, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - - // Get first repository URL available, no matter what the geolocation. - s := strings.Split(string(b), "\n")[0] - - dest, err = url.Parse(s) - if err != nil { - return nil, err - } - - return dest, nil -} diff --git a/pkg/distro/constants.go b/pkg/distro/constants.go index a9a328e..d151c7d 100644 --- a/pkg/distro/constants.go +++ b/pkg/distro/constants.go @@ -5,12 +5,13 @@ import "github.com/maxgio92/krawler/pkg/packages" const ( X8664Arch packages.Architecture = "x86_64" - // Default architecture for which scrape for packages. + // DefaultArch is the default architecture for which scrape for packages. DefaultArch = X8664Arch CentosType = "centos" AmazonLinuxV1Type = "amazonlinux" AmazonLinuxV2Type = "amazonlinux2" AmazonLinuxV2022Type = "amazonlinux2022" + AmazonLinuxV2023Type = "amazonlinux2023" DebianType = "debian" UbuntuType = "ubuntu" FedoraType = "fedora"