From 9161e5cd9569c01f28310b18075ee342def782ae Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 20 Aug 2024 15:10:33 +0200 Subject: [PATCH 1/4] Improves detection of PyPi package names in environment dependencies --- bundle/libraries/local_path.go | 15 ++++++++------- bundle/libraries/local_path_test.go | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 3e32adfde2..08da5f2ef4 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -3,6 +3,7 @@ package libraries import ( "net/url" "path" + "regexp" "strings" ) @@ -65,14 +66,14 @@ func IsLibraryLocal(dep string) bool { return IsLocalPath(dep) } -func isPackage(name string) bool { - // If the dependency has ==, it's a package with version - if strings.Contains(name, "==") { - return true - } +// ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). +// \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. +// ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +const packageRegex = `^[a-zA-Z0-9\-_]+(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$` - // If the dependency has no extension, it's a PyPi package name - return path.Ext(name) == "" +func isPackage(name string) bool { + re := regexp.MustCompile(packageRegex) + return re.MatchString(name) } func isRemoteStorageScheme(path string) bool { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 7299cdc934..5b6d3dc314 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -54,7 +54,13 @@ func TestIsLibraryLocal(t *testing.T) { {path: "-r /Workspace/my_project/requirements.txt", expected: false}, {path: "s3://mybucket/path/to/package", expected: false}, {path: "dbfs:/mnt/path/to/package", expected: false}, + {path: "beautifulsoup4", expected: false}, {path: "beautifulsoup4==4.12.3", expected: false}, + {path: "beautifulsoup4 >= 4.12.3", expected: false}, + {path: "beautifulsoup4 < 4.12.3", expected: false}, + {path: "beautifulsoup4 ~= 4.12.3", expected: false}, + {path: "beautifulsoup4[security, tests]", expected: false}, + {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, } for i, tc := range testCases { From 40e1f831f46900d10ed711d9bd363a2683c7f677 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 20 Aug 2024 15:14:40 +0200 Subject: [PATCH 2/4] added few more test cases --- bundle/libraries/local_path.go | 1 + bundle/libraries/local_path_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 08da5f2ef4..ffdb20f1dd 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -69,6 +69,7 @@ func IsLibraryLocal(dep string) bool { // ^[a-zA-Z0-9\-_]+: Matches the package name, allowing alphanumeric characters, dashes (-), and underscores (_). // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. // ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). +// Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ const packageRegex = `^[a-zA-Z0-9\-_]+(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$` func isPackage(name string) bool { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 5b6d3dc314..53997dc56f 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -61,6 +61,7 @@ func TestIsLibraryLocal(t *testing.T) { {path: "beautifulsoup4 ~= 4.12.3", expected: false}, {path: "beautifulsoup4[security, tests]", expected: false}, {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, + {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, } for i, tc := range testCases { From 28e1bed54393b4dc021a2bc831d0fceefc41e185 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 20 Aug 2024 15:26:41 +0200 Subject: [PATCH 3/4] support url lookups --- bundle/libraries/local_path.go | 19 +++++++++++++++++-- bundle/libraries/local_path_test.go | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index ffdb20f1dd..33c2e3a9da 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -70,11 +70,26 @@ func IsLibraryLocal(dep string) bool { // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. // ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). // Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ -const packageRegex = `^[a-zA-Z0-9\-_]+(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$` +const packageRegex = `^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$` func isPackage(name string) bool { re := regexp.MustCompile(packageRegex) - return re.MatchString(name) + if re.MatchString(name) { + return true + } + + return isUrlBasedLookup(name) +} + +func isUrlBasedLookup(name string) bool { + parts := strings.Split(name, " @ ") + if len(parts) != 2 { + return false + } + + packageRe := regexp.MustCompile(packageRegex) + urlRe := regexp.MustCompile(`^https?://`) + return packageRe.MatchString(parts[0]) && urlRe.MatchString(parts[1]) } func isRemoteStorageScheme(path string) bool { diff --git a/bundle/libraries/local_path_test.go b/bundle/libraries/local_path_test.go index 53997dc56f..7f84b32444 100644 --- a/bundle/libraries/local_path_test.go +++ b/bundle/libraries/local_path_test.go @@ -62,6 +62,8 @@ func TestIsLibraryLocal(t *testing.T) { {path: "beautifulsoup4[security, tests]", expected: false}, {path: "beautifulsoup4[security, tests] ~= 4.12.3", expected: false}, {path: "https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "pip @ https://github.com/pypa/pip/archive/22.0.2.zip", expected: false}, + {path: "requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip", expected: false}, } for i, tc := range testCases { From 974525e34f4e7a43d00e3b28342d0effd3115b1f Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Wed, 21 Aug 2024 10:14:34 +0200 Subject: [PATCH 4/4] fixes --- bundle/libraries/local_path.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/bundle/libraries/local_path.go b/bundle/libraries/local_path.go index 33c2e3a9da..417bce10e3 100644 --- a/bundle/libraries/local_path.go +++ b/bundle/libraries/local_path.go @@ -70,11 +70,10 @@ func IsLibraryLocal(dep string) bool { // \[.*\])?: Optionally matches any extras specified in square brackets, e.g., [security]. // ((==|!=|<=|>=|~=|>|<)\d+(\.\d+){0,2}(\.\*)?)?: Optionally matches version specifiers, supporting various operators (==, !=, etc.) followed by a version number (e.g., 2.25.1). // Spec for package name and version specifier: https://pip.pypa.io/en/stable/reference/requirement-specifiers/ -const packageRegex = `^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$` +var packageRegex = regexp.MustCompile(`^[a-zA-Z0-9\-_]+\s?(\[.*\])?\s?((==|!=|<=|>=|~=|==|>|<)\s?\d+(\.\d+){0,2}(\.\*)?)?$`) func isPackage(name string) bool { - re := regexp.MustCompile(packageRegex) - if re.MatchString(name) { + if packageRegex.MatchString(name) { return true } @@ -87,9 +86,7 @@ func isUrlBasedLookup(name string) bool { return false } - packageRe := regexp.MustCompile(packageRegex) - urlRe := regexp.MustCompile(`^https?://`) - return packageRe.MatchString(parts[0]) && urlRe.MatchString(parts[1]) + return packageRegex.MatchString(parts[0]) && isRemoteStorageScheme(parts[1]) } func isRemoteStorageScheme(path string) bool {