Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions acceptance/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ const (
CleanupScript = "script.cleanup"
PrepareScript = "script.prepare"
MaxFileSize = 100_000
// Filename to save replacements to (used by diff.py)
ReplsFile = "repls.json"
)

var Scripts = map[string]bool{
Expand All @@ -65,6 +67,10 @@ var Scripts = map[string]bool{
PrepareScript: true,
}

var Ignored = map[string]bool{
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to make this configurable, maybe in the future?

Copy link
Copy Markdown
Contributor Author

@denik denik Feb 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can discuss it once we have use case for it.

ReplsFile: true,
}

func TestAccept(t *testing.T) {
testAccept(t, InprocessMode, SingleTest)
}
Expand Down Expand Up @@ -152,6 +158,8 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
testdiff.PrepareReplacementSdkVersion(t, &repls)
testdiff.PrepareReplacementsGoVersion(t, &repls)

repls.SetPath(cwd, "[TESTROOT]")

repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"})

testDirs := getTests(t)
Expand Down Expand Up @@ -310,6 +318,11 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
// User replacements come last:
repls.Repls = append(repls.Repls, config.Repls...)

// Save replacements to temp test directory so that it can be read by diff.py
replsJson, err := json.MarshalIndent(repls.Repls, "", " ")
require.NoError(t, err)
testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson))

if coverDir != "" {
// Creating individual coverage directory for each test, because writing to the same one
// results in sporadic failures like this one (only if tests are running in parallel):
Expand All @@ -320,6 +333,10 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
cmd.Env = append(cmd.Env, "GOCOVERDIR="+coverDir)
}

absDir, err := filepath.Abs(dir)
require.NoError(t, err)
cmd.Env = append(cmd.Env, "TESTDIR="+absDir)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't absDir value be in quotes? It might contain spaces at least on Windows

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This passes on Windows CI and it's also not the only directory we have, so judging from practice, no. Unless you have a Windows machine where it does not work without quotes?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as it passes on Windows CI it's fine I guess


// Write combined output to a file
out, err := os.Create(filepath.Join(tmpDir, "output.txt"))
require.NoError(t, err)
Expand Down Expand Up @@ -368,6 +385,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
if _, ok := outputs[relPath]; ok {
continue
}
if _, ok := Ignored[relPath]; ok {
continue
}
unexpected = append(unexpected, relPath)
if strings.HasPrefix(relPath, "out") {
// We have a new file starting with "out"
Expand Down
56 changes: 56 additions & 0 deletions acceptance/bin/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python3
"""This script implements "diff -r -U2 dir1 dir2" but applies replacements first"""
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you have a chance to measure performance difference between running this test with diff and with repls.json writing + diff.py?

Copy link
Copy Markdown
Contributor Author

@denik denik Feb 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand -- diff is doing the wrong thing there, e.g. it will look at actual username rather than [USERNAME] and detect that as a difference. So performance is not important because they are not equivalent, functionality-wise.

We can bench diff.py against diff but even if it's 2x slower there is no action to take, because we use diff.py for handling replacements.

That's said I don't think diffing will be a bottleneck in your tests, so it makes sense to prefer diff.py because it has less cross-platform quirks than diff.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This relates to discussion in the other thread about potentially replacing the output without storing repl.json file and using diff instead.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But since it's not really possible to use diff anyway, performance here indeed doesn't really matter


import sys
import difflib
import json
import re
from pathlib import Path


def replaceAll(patterns, s):
for comp, new in patterns:
s = comp.sub(new, s)
return s


def main():
d1, d2 = sys.argv[1:]
d1, d2 = Path(d1), Path(d2)

with open("repls.json") as f:
repls = json.load(f)

patterns = []
for r in repls:
try:
c = re.compile(r["Old"])
patterns.append((c, r["New"]))
except re.error as e:
print(f"Regex error for pattern {r}: {e}", file=sys.stderr)

files1 = [str(p.relative_to(d1)) for p in d1.rglob("*") if p.is_file()]
files2 = [str(p.relative_to(d2)) for p in d2.rglob("*") if p.is_file()]

set1 = set(files1)
set2 = set(files2)

for f in sorted(set1 | set2):
p1 = d1 / f
p2 = d2 / f
if f not in set2:
print(f"Only in {d1}: {f}")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make it a bit more explicit phrasing? Something like "File X is found only in Y directory". When I was reading test output it was not immediately clear what's going on

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This follows "diff -r" phrasing. If you're familiar with that, it makes sense.

elif f not in set1:
print(f"Only in {d2}: {f}")
else:
a = [replaceAll(patterns, x) for x in p1.read_text().splitlines(True)]
b = [replaceAll(patterns, x) for x in p2.read_text().splitlines(True)]
if a != b:
p1_str = p1.as_posix()
p2_str = p2.as_posix()
for line in difflib.unified_diff(a, b, p1_str, p2_str, "", "", 2):
print(line, end="")


if __name__ == "__main__":
main()
7 changes: 7 additions & 0 deletions acceptance/selftest/diff/out_dir_a/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Hello!
{
"id": "[USERID]",
"userName": "[USERNAME]"
}

Footer
7 changes: 7 additions & 0 deletions acceptance/selftest/diff/out_dir_b/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Hello!
{
"id": "[UUID]",
"userName": "[USERNAME]"
}

Footer
13 changes: 13 additions & 0 deletions acceptance/selftest/diff/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

>>> diff.py out_dir_a out_dir_b
Only in out_dir_a: only_in_a
Only in out_dir_b: only_in_b
--- out_dir_a/output.txt
+++ out_dir_b/output.txt
@@ -1,5 +1,5 @@
Hello!
{
- "id": "[USERID]",
+ "id": "[UUID]",
"userName": "[USERNAME]"
}
17 changes: 17 additions & 0 deletions acceptance/selftest/diff/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
mkdir out_dir_a
mkdir out_dir_b

touch out_dir_a/only_in_a
touch out_dir_b/only_in_b

echo Hello! >> out_dir_a/output.txt
echo Hello! >> out_dir_b/output.txt

curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt
printf "\n\nFooter" >> out_dir_a/output.txt
printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt

# Unlike regular diff, diff.py will apply replacements first before doing the comparison
errcode trace diff.py out_dir_a out_dir_b

rm out_dir_a/only_in_a out_dir_b/only_in_b
1 change: 1 addition & 0 deletions acceptance/selftest/test.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
LocalOnly = true