Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(license): using common way for splitting licenses #4434

Merged
merged 6 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/fanal/analyzer/language/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func toApplication(fileType, filePath, libFilePath string, r dio.ReadSeekerAt, l
for _, lib := range libs {
var licenses []string
if lib.License != "" {
licenses = strings.Split(lib.License, ",")
licenses = licensing.SplitLicenses(lib.License)
for i, license := range licenses {
licenses[i] = licensing.Normalize(strings.TrimSpace(license))
}
Expand Down
10 changes: 1 addition & 9 deletions pkg/fanal/analyzer/pkg/dpkg/copyright.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ var (
dpkgLicenseAnalyzerVersion = 1

commonLicenseReferenceRegexp = regexp.MustCompile(`/?usr/share/common-licenses/([0-9A-Za-z_.+-]+[0-9A-Za-z+])`)
licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)")
)

// dpkgLicenseAnalyzer parses copyright files and detect licenses
Expand Down Expand Up @@ -90,14 +89,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens

l = normalizeLicense(l)
if len(l) > 0 {
// Split licenses without considering "and"/"or"
// examples:
// 'GPL-1+,GPL-2' => {"GPL-1", "GPL-2"}
// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1", "Artistic", "Artistic-dist"}
// 'LGPLv3+_or_GPLv2+' => {"LGPLv3", "GPLv2"}
// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"}
// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}
for _, lic := range licenseSplitRegexp.Split(l, -1) {
for _, lic := range licensing.SplitLicenses(l) {
lic = licensing.Normalize(lic)
if !slices.Contains(licenses, lic) {
licenses = append(licenses, lic)
Expand Down
26 changes: 26 additions & 0 deletions pkg/licensing/normalize.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package licensing

import (
"regexp"
"strings"
)

Expand Down Expand Up @@ -80,9 +81,34 @@ var mapping = map[string]string{
"PUBLIC DOMAIN": Unlicense,
}

// Split licenses without considering "and"/"or"
// examples:
// 'GPL-1+,GPL-2' => {"GPL-1+", "GPL-2"}
// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1+", "Artistic", "Artistic-dist"}
// 'LGPLv3+_or_GPLv2+' => {"LGPLv3+", "GPLv2"}
// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"}
// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}
// 'BSD 3-Clause License or Apache License, Version 2.0' => {"BSD 3-Clause License", "Apache License, Version 2.0"}
// var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)")

var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")

func Normalize(name string) string {
if l, ok := mapping[strings.ToUpper(name)]; ok {
return l
}
return name
}

func SplitLicenses(str string) []string {
var licenses []string
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
lower := strings.ToLower(maybeLic)
if (strings.HasPrefix(lower, "ver ") || strings.HasPrefix(lower, "version ")) && len(licenses) > 0 {
licenses[len(licenses)-1] += ", " + maybeLic
} else {
licenses = append(licenses, maybeLic)
}
}
return licenses
}
67 changes: 67 additions & 0 deletions pkg/licensing/normalize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package licensing_test

import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/aquasecurity/trivy/pkg/licensing"
)

func TestSplitLicenses(t *testing.T) {
tests := []struct {
name string
license string
licenses []string
}{
{
"simple list comma-separated",
"GPL-1+,GPL-2",
[]string{"GPL-1+", "GPL-2"},
},
{
"simple list comma-separated",
"GPL-1+,GPL-2,GPL-3",
[]string{"GPL-1+", "GPL-2", "GPL-3"},
},
{
"3 licenses 'or'-separated",
"GPL-1+ or Artistic or Artistic-dist",
[]string{"GPL-1+", "Artistic", "Artistic-dist"},
},
// '
{
"two licenses _or_ separated",
"LGPLv3+_or_GPLv2+",
[]string{"LGPLv3+", "GPLv2+"},
},
// '
{
"licenses `and`-separated",
"BSD-3-CLAUSE and GPL-2",
[]string{"BSD-3-CLAUSE", "GPL-2"},
},
{
"three licenses and/or separated",
"GPL-1+ or Artistic, and BSD-4-clause-POWERDOG",
[]string{"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"},
},
{
"two licenses with version",
"Apache License,Version 2.0, OSET Public License version 2.1",
[]string{"Apache License, Version 2.0", "OSET Public License version 2.1"},
},
{
"the license starts with `ver`",
"verbatim and BSD-4-clause",
[]string{"verbatim", "BSD-4-clause"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
res := licensing.SplitLicenses(tt.license)
assert.Equal(t, tt.licenses, res)
})
}
}