From 9bad6b822a73ca2b5d296480fea6efc2ad02d4eb Mon Sep 17 00:00:00 2001 From: afdesk Date: Thu, 18 May 2023 20:10:44 +0600 Subject: [PATCH 1/5] fix(license): using common way for splitting licenses --- pkg/fanal/analyzer/language/analyze.go | 2 +- pkg/fanal/analyzer/pkg/dpkg/copyright.go | 10 +--- pkg/licensing/normalize.go | 12 +++++ pkg/licensing/normalize_test.go | 58 ++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 pkg/licensing/normalize_test.go diff --git a/pkg/fanal/analyzer/language/analyze.go b/pkg/fanal/analyzer/language/analyze.go index 1d8c40f58771..1ec23c7f126b 100644 --- a/pkg/fanal/analyzer/language/analyze.go +++ b/pkg/fanal/analyzer/language/analyze.go @@ -92,7 +92,7 @@ func toApplication(fileType, filePath, libFilePath string, r dio.ReadSeekerAt, l for _, lib := range libs { var licenses []string if lib.License != "" { - licenses = strings.Split(lib.License, ",") + licenses = licensing.LicenseSplitRegexp.Split(lib.License, -1) for i, license := range licenses { licenses[i] = licensing.Normalize(strings.TrimSpace(license)) } diff --git a/pkg/fanal/analyzer/pkg/dpkg/copyright.go b/pkg/fanal/analyzer/pkg/dpkg/copyright.go index 4b8006e42bbb..f3ecebdb5076 100644 --- a/pkg/fanal/analyzer/pkg/dpkg/copyright.go +++ b/pkg/fanal/analyzer/pkg/dpkg/copyright.go @@ -27,7 +27,6 @@ var ( dpkgLicenseAnalyzerVersion = 1 commonLicenseReferenceRegexp = regexp.MustCompile(`/?usr/share/common-licenses/([0-9A-Za-z_.+-]+[0-9A-Za-z+])`) - licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") ) // dpkgLicenseAnalyzer parses copyright files and detect licenses @@ -90,14 +89,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens l = normalizeLicense(l) if len(l) > 0 { - // Split licenses without considering "and"/"or" - // examples: - // 'GPL-1+,GPL-2' => {"GPL-1", "GPL-2"} - // 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1", "Artistic", "Artistic-dist"} - // 'LGPLv3+_or_GPLv2+' => {"LGPLv3", "GPLv2"} - // 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"} - // 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"} - for _, lic := range licenseSplitRegexp.Split(l, -1) { + for _, lic := range licensing.LicenseSplitRegexp.Split(l, -1) { lic = licensing.Normalize(lic) if !slices.Contains(licenses, lic) { licenses = append(licenses, lic) diff --git a/pkg/licensing/normalize.go b/pkg/licensing/normalize.go index 5e25fc89ca13..625f03af34ac 100644 --- a/pkg/licensing/normalize.go +++ b/pkg/licensing/normalize.go @@ -1,6 +1,7 @@ package licensing import ( + "regexp" "strings" ) @@ -80,6 +81,17 @@ var mapping = map[string]string{ "PUBLIC DOMAIN": Unlicense, } +// Split licenses without considering "and"/"or" +// examples: +// 'GPL-1+,GPL-2' => {"GPL-1+", "GPL-2"} +// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1+", "Artistic", "Artistic-dist"} +// 'LGPLv3+_or_GPLv2+' => {"LGPLv3+", "GPLv2"} +// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"} +// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"} +// var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") + +var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") + func Normalize(name string) string { if l, ok := mapping[strings.ToUpper(name)]; ok { return l diff --git a/pkg/licensing/normalize_test.go b/pkg/licensing/normalize_test.go new file mode 100644 index 000000000000..8e0eb73d04a7 --- /dev/null +++ b/pkg/licensing/normalize_test.go @@ -0,0 +1,58 @@ +package licensing_test + +import ( + "github.com/stretchr/testify/assert" + "testing" + + "github.com/aquasecurity/trivy/pkg/licensing" +) + +func TestLicenseSplitRegexp(t *testing.T) { + tests := []struct { + name string + license string + licenses []string + }{ + { + "simple list commad-separated", + "GPL-1+,GPL-2", + []string{"GPL-1+", "GPL-2"}, + }, + { + "3 licenses 'or'-separated", + "GPL-1+ or Artistic or Artistic-dist", + []string{"GPL-1+", "Artistic", "Artistic-dist"}, + }, + // ' + { + "two licenses _or_ separated", + "LGPLv3+_or_GPLv2+", + []string{"LGPLv3+", "GPLv2+"}, + }, + // ' + { + "licenses `and`-separated", + "BSD-3-CLAUSE and GPL-2", + []string{"BSD-3-CLAUSE", "GPL-2"}, + }, + { + "three licenses and/or separated", + "GPL-1+ or Artistic, and BSD-4-clause-POWERDOG", + []string{"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}, + }, + /* + { + "two licenses with version", + "BSD 3-Clause License or Apache License, Version 2.0", + []string{"BSD 3-Clause License", "Apache License, Version 2.0"}, + }, + */ + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + res := licensing.LicenseSplitRegexp.Split(tt.license, -1) + assert.Equal(t, tt.licenses, res) + }) + } +} From 9c47246a65c9fbad93ac5379d63b68c348b3be1c Mon Sep 17 00:00:00 2001 From: afdesk Date: Tue, 20 Jun 2023 20:14:58 +0600 Subject: [PATCH 2/5] add test cases --- pkg/licensing/normalize_test.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/licensing/normalize_test.go b/pkg/licensing/normalize_test.go index 8e0eb73d04a7..03abb64bb824 100644 --- a/pkg/licensing/normalize_test.go +++ b/pkg/licensing/normalize_test.go @@ -18,6 +18,11 @@ func TestLicenseSplitRegexp(t *testing.T) { "GPL-1+,GPL-2", []string{"GPL-1+", "GPL-2"}, }, + { + "simple list commad-separated", + "GPL-1+,GPL-2,GPL-3", + []string{"GPL-1+", "GPL-2", "GPL-3"}, + }, { "3 licenses 'or'-separated", "GPL-1+ or Artistic or Artistic-dist", @@ -40,13 +45,11 @@ func TestLicenseSplitRegexp(t *testing.T) { "GPL-1+ or Artistic, and BSD-4-clause-POWERDOG", []string{"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}, }, - /* - { - "two licenses with version", - "BSD 3-Clause License or Apache License, Version 2.0", - []string{"BSD 3-Clause License", "Apache License, Version 2.0"}, - }, - */ + { + "two licenses with version", + "BSD 3-Clause License or Apache License, Version 2.0", + []string{"BSD 3-Clause License", "Apache License, Version 2.0"}, + }, } for _, tt := range tests { From 8623d7f8e45e318c82f390d244fac3c53c7cc16e Mon Sep 17 00:00:00 2001 From: afdesk Date: Tue, 20 Jun 2023 20:16:02 +0600 Subject: [PATCH 3/5] TEST new regex --- pkg/licensing/normalize.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/licensing/normalize.go b/pkg/licensing/normalize.go index 625f03af34ac..0e4d04545ec1 100644 --- a/pkg/licensing/normalize.go +++ b/pkg/licensing/normalize.go @@ -88,9 +88,10 @@ var mapping = map[string]string{ // 'LGPLv3+_or_GPLv2+' => {"LGPLv3+", "GPLv2"} // 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"} // 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"} +// 'BSD 3-Clause License or Apache License, Version 2.0' => {"BSD 3-Clause License", "Apache License, Version 2.0"} // var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") -var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") +var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ]+)|(!.*,[ _]*Version[ _].*)(,[ ]*)") func Normalize(name string) string { if l, ok := mapping[strings.ToUpper(name)]; ok { From ac46b32a26121cb25b0034525d99e4190da59e85 Mon Sep 17 00:00:00 2001 From: Nikita Pivkin Date: Tue, 27 Jun 2023 11:15:36 +0600 Subject: [PATCH 4/5] extract function --- pkg/fanal/analyzer/language/analyze.go | 2 +- pkg/fanal/analyzer/pkg/dpkg/copyright.go | 2 +- pkg/licensing/normalize.go | 15 ++++++++++++++- pkg/licensing/normalize_test.go | 15 ++++++++------- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/pkg/fanal/analyzer/language/analyze.go b/pkg/fanal/analyzer/language/analyze.go index 1ec23c7f126b..be1a37f2e616 100644 --- a/pkg/fanal/analyzer/language/analyze.go +++ b/pkg/fanal/analyzer/language/analyze.go @@ -92,7 +92,7 @@ func toApplication(fileType, filePath, libFilePath string, r dio.ReadSeekerAt, l for _, lib := range libs { var licenses []string if lib.License != "" { - licenses = licensing.LicenseSplitRegexp.Split(lib.License, -1) + licenses = licensing.SplitLicenses(lib.License) for i, license := range licenses { licenses[i] = licensing.Normalize(strings.TrimSpace(license)) } diff --git a/pkg/fanal/analyzer/pkg/dpkg/copyright.go b/pkg/fanal/analyzer/pkg/dpkg/copyright.go index f3ecebdb5076..9b962b8c3e49 100644 --- a/pkg/fanal/analyzer/pkg/dpkg/copyright.go +++ b/pkg/fanal/analyzer/pkg/dpkg/copyright.go @@ -89,7 +89,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens l = normalizeLicense(l) if len(l) > 0 { - for _, lic := range licensing.LicenseSplitRegexp.Split(l, -1) { + for _, lic := range licensing.SplitLicenses(l) { lic = licensing.Normalize(lic) if !slices.Contains(licenses, lic) { licenses = append(licenses, lic) diff --git a/pkg/licensing/normalize.go b/pkg/licensing/normalize.go index 0e4d04545ec1..527f66fcc769 100644 --- a/pkg/licensing/normalize.go +++ b/pkg/licensing/normalize.go @@ -91,7 +91,7 @@ var mapping = map[string]string{ // 'BSD 3-Clause License or Apache License, Version 2.0' => {"BSD 3-Clause License", "Apache License, Version 2.0"} // var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") -var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ]+)|(!.*,[ _]*Version[ _].*)(,[ ]*)") +var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)") func Normalize(name string) string { if l, ok := mapping[strings.ToUpper(name)]; ok { @@ -99,3 +99,16 @@ func Normalize(name string) string { } return name } + +func SplitLicenses(str string) []string { + var licenses []string + for _, maybeLic := range licenseSplitRegexp.Split(str, -1) { + // no version starts with "ver" https://spdx.org/licenses/ + if strings.HasPrefix(strings.ToLower(maybeLic), "ver") { + licenses[len(licenses)-1] += ", " + maybeLic + } else { + licenses = append(licenses, maybeLic) + } + } + return licenses +} diff --git a/pkg/licensing/normalize_test.go b/pkg/licensing/normalize_test.go index 03abb64bb824..899fd485d78d 100644 --- a/pkg/licensing/normalize_test.go +++ b/pkg/licensing/normalize_test.go @@ -1,25 +1,26 @@ package licensing_test import ( - "github.com/stretchr/testify/assert" "testing" + "github.com/stretchr/testify/assert" + "github.com/aquasecurity/trivy/pkg/licensing" ) -func TestLicenseSplitRegexp(t *testing.T) { +func TestSplitLicenses(t *testing.T) { tests := []struct { name string license string licenses []string }{ { - "simple list commad-separated", + "simple list comma-separated", "GPL-1+,GPL-2", []string{"GPL-1+", "GPL-2"}, }, { - "simple list commad-separated", + "simple list comma-separated", "GPL-1+,GPL-2,GPL-3", []string{"GPL-1+", "GPL-2", "GPL-3"}, }, @@ -47,14 +48,14 @@ func TestLicenseSplitRegexp(t *testing.T) { }, { "two licenses with version", - "BSD 3-Clause License or Apache License, Version 2.0", - []string{"BSD 3-Clause License", "Apache License, Version 2.0"}, + "Apache License,Version 2.0, OSET Public License version 2.1", + []string{"Apache License, Version 2.0", "OSET Public License version 2.1"}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - res := licensing.LicenseSplitRegexp.Split(tt.license, -1) + res := licensing.SplitLicenses(tt.license) assert.Equal(t, tt.licenses, res) }) } From 3b06089d1d2d1504a47bfa699c8ab5f3333221d2 Mon Sep 17 00:00:00 2001 From: Nikita Pivkin Date: Tue, 27 Jun 2023 13:27:56 +0600 Subject: [PATCH 5/5] fix version detection --- pkg/licensing/normalize.go | 4 ++-- pkg/licensing/normalize_test.go | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pkg/licensing/normalize.go b/pkg/licensing/normalize.go index 527f66fcc769..38956108c4fc 100644 --- a/pkg/licensing/normalize.go +++ b/pkg/licensing/normalize.go @@ -103,8 +103,8 @@ func Normalize(name string) string { func SplitLicenses(str string) []string { var licenses []string for _, maybeLic := range licenseSplitRegexp.Split(str, -1) { - // no version starts with "ver" https://spdx.org/licenses/ - if strings.HasPrefix(strings.ToLower(maybeLic), "ver") { + lower := strings.ToLower(maybeLic) + if (strings.HasPrefix(lower, "ver ") || strings.HasPrefix(lower, "version ")) && len(licenses) > 0 { licenses[len(licenses)-1] += ", " + maybeLic } else { licenses = append(licenses, maybeLic) diff --git a/pkg/licensing/normalize_test.go b/pkg/licensing/normalize_test.go index 899fd485d78d..a13cbed5d863 100644 --- a/pkg/licensing/normalize_test.go +++ b/pkg/licensing/normalize_test.go @@ -51,6 +51,11 @@ func TestSplitLicenses(t *testing.T) { "Apache License,Version 2.0, OSET Public License version 2.1", []string{"Apache License, Version 2.0", "OSET Public License version 2.1"}, }, + { + "the license starts with `ver`", + "verbatim and BSD-4-clause", + []string{"verbatim", "BSD-4-clause"}, + }, } for _, tt := range tests {