From faf9df58b180458690e7ea9426ad7b8bb5da2259 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 13 Feb 2025 09:53:39 +0100 Subject: [PATCH 1/6] acc: add a helper to diff with replacements diff.py is like "diff -r -U2" but it applies replacements first to the argument. This allows comparing different output files and directories but ignore differences that are going to be replaced by placeholders. This is useful for tests that record large amount of files, specifically "bundle init" with standard templates. In those tests, changing one parameter results in a small diff so recording the full directory is not helpful, because it's hard to see what changed there. I'm using it in implementation of serverless mode for templates that need it: #2348 Related small changes: add [TESTROOT] replacement for absolute path to acceptance directory in git repo. Add $TESTDIR env var for absolute path to a given test in git repo. --- acceptance/acceptance_test.go | 20 ++++++ acceptance/bin/diff.py | 62 +++++++++++++++++++ acceptance/config_test.go | 3 + acceptance/selftest/diff/out_dir_a/output.txt | 7 +++ acceptance/selftest/diff/out_dir_b/output.txt | 7 +++ acceptance/selftest/diff/output.txt | 13 ++++ acceptance/selftest/diff/script | 17 +++++ acceptance/selftest/diff/test.toml | 1 + 8 files changed, 130 insertions(+) create mode 100755 acceptance/bin/diff.py create mode 100644 acceptance/selftest/diff/out_dir_a/output.txt create mode 100644 acceptance/selftest/diff/out_dir_b/output.txt create mode 100644 acceptance/selftest/diff/output.txt create mode 100644 acceptance/selftest/diff/script create mode 100644 acceptance/selftest/diff/test.toml diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index d99ad29914..7d10bcdae1 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -56,6 +56,7 @@ const ( EntryPointScript = "script" CleanupScript = "script.cleanup" PrepareScript = "script.prepare" + ReplsFile = "repls.json" MaxFileSize = 100_000 ) @@ -65,6 +66,10 @@ var Scripts = map[string]bool{ PrepareScript: true, } +var Ignored = map[string]bool{ + ReplsFile: true, +} + func TestAccept(t *testing.T) { testAccept(t, InprocessMode, SingleTest) } @@ -152,6 +157,8 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { testdiff.PrepareReplacementSdkVersion(t, &repls) testdiff.PrepareReplacementsGoVersion(t, &repls) + repls.SetPath(cwd, "[TESTROOT]") + repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"}) testDirs := getTests(t) @@ -310,6 +317,12 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont // User replacements come last: repls.Repls = append(repls.Repls, config.Repls...) + if config.SaveRepls { + replsJson, err := json.MarshalIndent(repls.Repls, "", " ") + require.NoError(t, err) + testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson)) + } + if coverDir != "" { // Creating individual coverage directory for each test, because writing to the same one // results in sporadic failures like this one (only if tests are running in parallel): @@ -320,6 +333,10 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont cmd.Env = append(cmd.Env, "GOCOVERDIR="+coverDir) } + absDir, err := filepath.Abs(dir) + require.NoError(t, err) + cmd.Env = append(cmd.Env, "TESTDIR="+absDir) + // Write combined output to a file out, err := os.Create(filepath.Join(tmpDir, "output.txt")) require.NoError(t, err) @@ -368,6 +385,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont if _, ok := outputs[relPath]; ok { continue } + if _, ok := Ignored[relPath]; ok { + continue + } unexpected = append(unexpected, relPath) if strings.HasPrefix(relPath, "out") { // We have a new file starting with "out" diff --git a/acceptance/bin/diff.py b/acceptance/bin/diff.py new file mode 100755 index 0000000000..7065cdbdef --- /dev/null +++ b/acceptance/bin/diff.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""This script implements "diff -r -U2 dir1 dir2" but it applies replacements from repls.json to files found in dir2.""" + +import sys +import os +import difflib +import json +import re + + +def replaceAll(patterns, s): + for comp, new in patterns: + s = comp.sub(new, s) + return s + + +def main(): + d1, d2 = sys.argv[1:] + + with open("repls.json") as f: + repls = json.load(f) + + patterns = [] + for r in repls: + try: + c = re.compile(r["Old"]) + patterns.append((c, r["New"])) + except re.error as e: + print(f"Regex error for pattern {r}: {e}", file=sys.stderr) + + files1 = [] + for root, dirs, fs in os.walk(d1): + for f in fs: + files1.append(os.path.relpath(os.path.join(root, f), d1)) + + files2 = [] + for root, dirs, fs in os.walk(d2): + for f in fs: + files2.append(os.path.relpath(os.path.join(root, f), d2)) + + set1 = set(files1) + set2 = set(files2) + + for f in sorted(set1 | set2): + p1 = os.path.join(d1, f) + p2 = os.path.join(d2, f) + if f not in set2: + print(f"Only in {d1}: {f}") + elif f not in set1: + print(f"Only in {d2}: {f}") + else: + a = [replaceAll(patterns, x) for x in open(p1).readlines()] + b = [replaceAll(patterns, x) for x in open(p2).readlines()] + if a != b: + p1 = p1.replace("\\", "/") + p2 = p2.replace("\\", "/") + for line in difflib.unified_diff(a, b, p1, p2, "", "", 2): + print(line, end="") + + +if __name__ == '__main__': + main() diff --git a/acceptance/config_test.go b/acceptance/config_test.go index ec0d1baeea..6686cdc7ac 100644 --- a/acceptance/config_test.go +++ b/acceptance/config_test.go @@ -27,6 +27,9 @@ type TestConfig struct { // If true, do not run this test against cloud environment LocalOnly bool + // if true, save file repls.json with all the replacemnts + SaveRepls bool + // List of additional replacements to apply on this test. // Old is a regexp, New is a replacement expression. Repls []testdiff.Replacement diff --git a/acceptance/selftest/diff/out_dir_a/output.txt b/acceptance/selftest/diff/out_dir_a/output.txt new file mode 100644 index 0000000000..303c1867b4 --- /dev/null +++ b/acceptance/selftest/diff/out_dir_a/output.txt @@ -0,0 +1,7 @@ +Hello! +{ + "id": "[USERID]", + "userName": "[USERNAME]" +} + +Footer \ No newline at end of file diff --git a/acceptance/selftest/diff/out_dir_b/output.txt b/acceptance/selftest/diff/out_dir_b/output.txt new file mode 100644 index 0000000000..f4f01af139 --- /dev/null +++ b/acceptance/selftest/diff/out_dir_b/output.txt @@ -0,0 +1,7 @@ +Hello! +{ + "id": "[UUID]", + "userName": "[USERNAME]" +} + +Footer \ No newline at end of file diff --git a/acceptance/selftest/diff/output.txt b/acceptance/selftest/diff/output.txt new file mode 100644 index 0000000000..aef99f1e38 --- /dev/null +++ b/acceptance/selftest/diff/output.txt @@ -0,0 +1,13 @@ + +>>> diff.py out_dir_a out_dir_b +Only in out_dir_a: only_in_a +Only in out_dir_b: only_in_b +--- out_dir_a/output.txt ++++ out_dir_b/output.txt +@@ -1,5 +1,5 @@ + Hello! + { +- "id": "[USERID]", ++ "id": "[UUID]", + "userName": "[USERNAME]" + } diff --git a/acceptance/selftest/diff/script b/acceptance/selftest/diff/script new file mode 100644 index 0000000000..a7b8706e6b --- /dev/null +++ b/acceptance/selftest/diff/script @@ -0,0 +1,17 @@ +mkdir out_dir_a +mkdir out_dir_b + +touch out_dir_a/only_in_a +touch out_dir_b/only_in_b + +echo Hello! >> out_dir_a/output.txt +echo Hello! >> out_dir_b/output.txt + +curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt +printf "\n\nFooter" >> out_dir_a/output.txt +printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt + +# Unlike regular diff, diff.py will apply replacements first before doing the comparison +errcode trace diff.py out_dir_a out_dir_b + +rm out_dir_a/only_in_a out_dir_b/only_in_b diff --git a/acceptance/selftest/diff/test.toml b/acceptance/selftest/diff/test.toml new file mode 100644 index 0000000000..d5712aea53 --- /dev/null +++ b/acceptance/selftest/diff/test.toml @@ -0,0 +1 @@ +SaveRepls = true From 9651bcbeb7a63391bfe9ffc689a1a9195811c89f Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 13 Feb 2025 10:16:28 +0100 Subject: [PATCH 2/6] pathlib --- acceptance/bin/diff.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/acceptance/bin/diff.py b/acceptance/bin/diff.py index 7065cdbdef..84769012a1 100755 --- a/acceptance/bin/diff.py +++ b/acceptance/bin/diff.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 -"""This script implements "diff -r -U2 dir1 dir2" but it applies replacements from repls.json to files found in dir2.""" +"""This script implements "diff -r -U2 dir1 dir2" but applies replacements first""" import sys -import os import difflib import json import re +from pathlib import Path def replaceAll(patterns, s): @@ -16,8 +16,9 @@ def replaceAll(patterns, s): def main(): d1, d2 = sys.argv[1:] + d1, d2 = Path(d1), Path(d2) - with open("repls.json") as f: + with open("repls.json") as f: # Must have 'SaveRepls = true' in test.toml repls = json.load(f) patterns = [] @@ -28,33 +29,26 @@ def main(): except re.error as e: print(f"Regex error for pattern {r}: {e}", file=sys.stderr) - files1 = [] - for root, dirs, fs in os.walk(d1): - for f in fs: - files1.append(os.path.relpath(os.path.join(root, f), d1)) - - files2 = [] - for root, dirs, fs in os.walk(d2): - for f in fs: - files2.append(os.path.relpath(os.path.join(root, f), d2)) + files1 = [str(p.relative_to(d1)) for p in d1.rglob("*") if p.is_file()] + files2 = [str(p.relative_to(d2)) for p in d2.rglob("*") if p.is_file()] set1 = set(files1) set2 = set(files2) for f in sorted(set1 | set2): - p1 = os.path.join(d1, f) - p2 = os.path.join(d2, f) + p1 = d1 / f + p2 = d2 / f if f not in set2: print(f"Only in {d1}: {f}") elif f not in set1: print(f"Only in {d2}: {f}") else: - a = [replaceAll(patterns, x) for x in open(p1).readlines()] - b = [replaceAll(patterns, x) for x in open(p2).readlines()] + a = [replaceAll(patterns, x) for x in p1.read_text().splitlines(True)] + b = [replaceAll(patterns, x) for x in p2.read_text().splitlines(True)] if a != b: - p1 = p1.replace("\\", "/") - p2 = p2.replace("\\", "/") - for line in difflib.unified_diff(a, b, p1, p2, "", "", 2): + p1_str = p1.as_posix() + p2_str = p2.as_posix() + for line in difflib.unified_diff(a, b, p1_str, p2_str, "", "", 2): print(line, end="") From bca4e6ce2fbadd6da34a9cab14ac293abd3a9f64 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 13 Feb 2025 10:26:53 +0100 Subject: [PATCH 3/6] ruff --- acceptance/bin/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/bin/diff.py b/acceptance/bin/diff.py index 84769012a1..5c3387f98b 100755 --- a/acceptance/bin/diff.py +++ b/acceptance/bin/diff.py @@ -52,5 +52,5 @@ def main(): print(line, end="") -if __name__ == '__main__': +if __name__ == "__main__": main() From 8db21ba4b5e56bddb8337da9423a677baccfc6db Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Thu, 13 Feb 2025 11:03:31 +0100 Subject: [PATCH 4/6] Disable selftest on cloud --- acceptance/selftest/test.toml | 1 + 1 file changed, 1 insertion(+) create mode 100644 acceptance/selftest/test.toml diff --git a/acceptance/selftest/test.toml b/acceptance/selftest/test.toml new file mode 100644 index 0000000000..b76e712fbc --- /dev/null +++ b/acceptance/selftest/test.toml @@ -0,0 +1 @@ +LocalOnly = true From b04d8051338db82d7d5d34d90f143efd969e008e Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 14 Feb 2025 10:56:09 +0100 Subject: [PATCH 5/6] add a comment --- acceptance/acceptance_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 7d10bcdae1..c40ac74d5f 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -56,8 +56,9 @@ const ( EntryPointScript = "script" CleanupScript = "script.cleanup" PrepareScript = "script.prepare" - ReplsFile = "repls.json" MaxFileSize = 100_000 + // Filename to save replacements if SaveRepls is set to true (used by diff.py) + ReplsFile = "repls.json" ) var Scripts = map[string]bool{ From c8b4196fbcb20fa19202737f7bda3b6de4258297 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 14 Feb 2025 11:28:56 +0100 Subject: [PATCH 6/6] get rid of SaveRepls config setting --- acceptance/acceptance_test.go | 11 +++++------ acceptance/bin/diff.py | 2 +- acceptance/config_test.go | 3 --- acceptance/selftest/diff/test.toml | 1 - 4 files changed, 6 insertions(+), 11 deletions(-) delete mode 100644 acceptance/selftest/diff/test.toml diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index c40ac74d5f..c7b1151abb 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -57,7 +57,7 @@ const ( CleanupScript = "script.cleanup" PrepareScript = "script.prepare" MaxFileSize = 100_000 - // Filename to save replacements if SaveRepls is set to true (used by diff.py) + // Filename to save replacements to (used by diff.py) ReplsFile = "repls.json" ) @@ -318,11 +318,10 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont // User replacements come last: repls.Repls = append(repls.Repls, config.Repls...) - if config.SaveRepls { - replsJson, err := json.MarshalIndent(repls.Repls, "", " ") - require.NoError(t, err) - testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson)) - } + // Save replacements to temp test directory so that it can be read by diff.py + replsJson, err := json.MarshalIndent(repls.Repls, "", " ") + require.NoError(t, err) + testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson)) if coverDir != "" { // Creating individual coverage directory for each test, because writing to the same one diff --git a/acceptance/bin/diff.py b/acceptance/bin/diff.py index 5c3387f98b..0a91d57ce3 100755 --- a/acceptance/bin/diff.py +++ b/acceptance/bin/diff.py @@ -18,7 +18,7 @@ def main(): d1, d2 = sys.argv[1:] d1, d2 = Path(d1), Path(d2) - with open("repls.json") as f: # Must have 'SaveRepls = true' in test.toml + with open("repls.json") as f: repls = json.load(f) patterns = [] diff --git a/acceptance/config_test.go b/acceptance/config_test.go index 6686cdc7ac..ec0d1baeea 100644 --- a/acceptance/config_test.go +++ b/acceptance/config_test.go @@ -27,9 +27,6 @@ type TestConfig struct { // If true, do not run this test against cloud environment LocalOnly bool - // if true, save file repls.json with all the replacemnts - SaveRepls bool - // List of additional replacements to apply on this test. // Old is a regexp, New is a replacement expression. Repls []testdiff.Replacement diff --git a/acceptance/selftest/diff/test.toml b/acceptance/selftest/diff/test.toml deleted file mode 100644 index d5712aea53..0000000000 --- a/acceptance/selftest/diff/test.toml +++ /dev/null @@ -1 +0,0 @@ -SaveRepls = true