-
Notifications
You must be signed in to change notification settings - Fork 154
acc: add a helper to diff with replacements #2352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -57,6 +57,8 @@ const ( | |
| CleanupScript = "script.cleanup" | ||
| PrepareScript = "script.prepare" | ||
| MaxFileSize = 100_000 | ||
| // Filename to save replacements to (used by diff.py) | ||
| ReplsFile = "repls.json" | ||
| ) | ||
|
|
||
| var Scripts = map[string]bool{ | ||
|
|
@@ -65,6 +67,10 @@ var Scripts = map[string]bool{ | |
| PrepareScript: true, | ||
| } | ||
|
|
||
| var Ignored = map[string]bool{ | ||
| ReplsFile: true, | ||
| } | ||
|
|
||
| func TestAccept(t *testing.T) { | ||
| testAccept(t, InprocessMode, SingleTest) | ||
| } | ||
|
|
@@ -152,6 +158,8 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { | |
| testdiff.PrepareReplacementSdkVersion(t, &repls) | ||
| testdiff.PrepareReplacementsGoVersion(t, &repls) | ||
|
|
||
| repls.SetPath(cwd, "[TESTROOT]") | ||
|
|
||
| repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"}) | ||
|
|
||
| testDirs := getTests(t) | ||
|
|
@@ -310,6 +318,11 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont | |
| // User replacements come last: | ||
| repls.Repls = append(repls.Repls, config.Repls...) | ||
|
|
||
| // Save replacements to temp test directory so that it can be read by diff.py | ||
| replsJson, err := json.MarshalIndent(repls.Repls, "", " ") | ||
| require.NoError(t, err) | ||
| testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson)) | ||
|
|
||
| if coverDir != "" { | ||
| // Creating individual coverage directory for each test, because writing to the same one | ||
| // results in sporadic failures like this one (only if tests are running in parallel): | ||
|
|
@@ -320,6 +333,10 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont | |
| cmd.Env = append(cmd.Env, "GOCOVERDIR="+coverDir) | ||
| } | ||
|
|
||
| absDir, err := filepath.Abs(dir) | ||
| require.NoError(t, err) | ||
| cmd.Env = append(cmd.Env, "TESTDIR="+absDir) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This passes on Windows CI and it's also not the only directory we have, so judging from practice, no. Unless you have a Windows machine where it does not work without quotes?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As long as it passes on Windows CI it's fine I guess |
||
|
|
||
| // Write combined output to a file | ||
| out, err := os.Create(filepath.Join(tmpDir, "output.txt")) | ||
| require.NoError(t, err) | ||
|
|
@@ -368,6 +385,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont | |
| if _, ok := outputs[relPath]; ok { | ||
| continue | ||
| } | ||
| if _, ok := Ignored[relPath]; ok { | ||
| continue | ||
| } | ||
| unexpected = append(unexpected, relPath) | ||
| if strings.HasPrefix(relPath, "out") { | ||
| // We have a new file starting with "out" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| #!/usr/bin/env python3 | ||
| """This script implements "diff -r -U2 dir1 dir2" but applies replacements first""" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you have a chance to measure performance difference between running this test with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand -- We can bench diff.py against diff but even if it's 2x slower there is no action to take, because we use diff.py for handling replacements. That's said I don't think diffing will be a bottleneck in your tests, so it makes sense to prefer
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This relates to discussion in the other thread about potentially replacing the output without storing repl.json file and using
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But since it's not really possible to use |
||
|
|
||
| import sys | ||
| import difflib | ||
| import json | ||
| import re | ||
| from pathlib import Path | ||
|
|
||
|
|
||
| def replaceAll(patterns, s): | ||
| for comp, new in patterns: | ||
| s = comp.sub(new, s) | ||
| return s | ||
|
|
||
|
|
||
| def main(): | ||
| d1, d2 = sys.argv[1:] | ||
| d1, d2 = Path(d1), Path(d2) | ||
|
|
||
| with open("repls.json") as f: | ||
| repls = json.load(f) | ||
|
|
||
| patterns = [] | ||
| for r in repls: | ||
| try: | ||
| c = re.compile(r["Old"]) | ||
| patterns.append((c, r["New"])) | ||
| except re.error as e: | ||
| print(f"Regex error for pattern {r}: {e}", file=sys.stderr) | ||
|
|
||
| files1 = [str(p.relative_to(d1)) for p in d1.rglob("*") if p.is_file()] | ||
| files2 = [str(p.relative_to(d2)) for p in d2.rglob("*") if p.is_file()] | ||
|
|
||
| set1 = set(files1) | ||
| set2 = set(files2) | ||
|
|
||
| for f in sorted(set1 | set2): | ||
| p1 = d1 / f | ||
| p2 = d2 / f | ||
| if f not in set2: | ||
| print(f"Only in {d1}: {f}") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you make it a bit more explicit phrasing? Something like "File X is found only in Y directory". When I was reading test output it was not immediately clear what's going on
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This follows "diff -r" phrasing. If you're familiar with that, it makes sense. |
||
| elif f not in set1: | ||
| print(f"Only in {d2}: {f}") | ||
| else: | ||
| a = [replaceAll(patterns, x) for x in p1.read_text().splitlines(True)] | ||
| b = [replaceAll(patterns, x) for x in p2.read_text().splitlines(True)] | ||
| if a != b: | ||
| p1_str = p1.as_posix() | ||
| p2_str = p2.as_posix() | ||
| for line in difflib.unified_diff(a, b, p1_str, p2_str, "", "", 2): | ||
| print(line, end="") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| Hello! | ||
| { | ||
| "id": "[USERID]", | ||
| "userName": "[USERNAME]" | ||
| } | ||
|
|
||
| Footer |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| Hello! | ||
| { | ||
| "id": "[UUID]", | ||
| "userName": "[USERNAME]" | ||
| } | ||
|
|
||
| Footer |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
|
|
||
| >>> diff.py out_dir_a out_dir_b | ||
| Only in out_dir_a: only_in_a | ||
| Only in out_dir_b: only_in_b | ||
| --- out_dir_a/output.txt | ||
| +++ out_dir_b/output.txt | ||
| @@ -1,5 +1,5 @@ | ||
| Hello! | ||
| { | ||
| - "id": "[USERID]", | ||
| + "id": "[UUID]", | ||
| "userName": "[USERNAME]" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| mkdir out_dir_a | ||
| mkdir out_dir_b | ||
|
|
||
| touch out_dir_a/only_in_a | ||
| touch out_dir_b/only_in_b | ||
|
|
||
| echo Hello! >> out_dir_a/output.txt | ||
| echo Hello! >> out_dir_b/output.txt | ||
|
|
||
| curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt | ||
| printf "\n\nFooter" >> out_dir_a/output.txt | ||
| printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt | ||
|
|
||
| # Unlike regular diff, diff.py will apply replacements first before doing the comparison | ||
| errcode trace diff.py out_dir_a out_dir_b | ||
|
|
||
| rm out_dir_a/only_in_a out_dir_b/only_in_b |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| LocalOnly = true |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it make sense to make this configurable, maybe in the future?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can discuss it once we have use case for it.