From c775d251eda6fa567de95e55e4558d5b99abce39 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 21 Aug 2024 10:22:35 +0200 Subject: [PATCH] Improves detection of PyPI package names in environment dependencies (#1699) ## Changes Improves detection of PyPi package names in environment dependencies ## Tests Added unit tests --- bundle/libraries/local_path.go | 22 ++++++++++++++++++---- bundle/libraries/local_path_test.go | 9 +++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 3e32adfde2..417bce10e3 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -3,6 +3,7 @@ package libraries import ( "net/url" "path" + "regexp" "strings" ) @@ -65,14 +66,27 @@ func IsLibraryLocal(dep string) bool { return IsLocalPath(dep) } +// ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). +// \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. +// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ +var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$`) + func isPackage(name string) bool { - // If the dependency has ==, it's a package with version - if strings.Contains(name, "==") { + if packageRegex.MatchString(name) { return true } - // If the dependency has no extension, it's a PyPi package name - return path.Ext(name) == "" + return isUrlBasedLookup(name) +} + +func isUrlBasedLookup(name string) bool { + parts := strings.Split(name, " @ ") + if len(parts) != 2 { + return false + } + + return packageRegex.MatchString(parts[0]) && isRemoteStorageScheme(parts[1]) } func isRemoteStorageScheme(path string) bool { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 7299cdc934..7f84b32444 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -54,7 +54,16 @@ func TestIsLibraryLocal(t *testing.T) { {path: "-r /Workspace/my_project/requirements.txt", expected: false}, {path: "s3://mybucket/path/to/package", expected: false}, {path: "dbfs:/mnt/path/to/package", expected: false}, + {path: "beautifulsoup4", expected: false}, {path: "beautifulsoup4==4.12.3", expected: false}, + {path: "beautifulsoup4 >= 4.12.3", expected: false}, + {path: "beautifulsoup4 < 4.12.3", expected: false}, + {path: "beautifulsoup4 ~= 4.12.3", expected: false}, + {path: "beautifulsoup4[security, tests]", expected: false}, + {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, + {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip", expected: false}, } for i, tc := range testCases {