From b63b64d9f1141b6441f1ca241eb60a41569bf2cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20Harboe?= Date: Mon, 7 Jul 2025 13:40:19 +0200 Subject: [PATCH] logging: log hash alongside elapsed to help find divergent results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Øyvind Harboe --- flow/test/test_genElapsedTime.py | 6 +++--- flow/util/genElapsedTime.py | 28 ++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/flow/test/test_genElapsedTime.py b/flow/test/test_genElapsedTime.py index e6108cb093..36da00209e 100755 --- a/flow/test/test_genElapsedTime.py +++ b/flow/test/test_genElapsedTime.py @@ -30,7 +30,7 @@ def test_elapsed_time(self, mock_stdout): genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"]) # check if output is correct expected_output = ( - self.tmp_dir.name + "\n1_test 5400 9440\nTotal 5400 9440\n" + self.tmp_dir.name + "\n1_test 5400 9440 N/A\nTotal 5400 9440\n" ).split() actual_output = mock_stdout.getvalue().split() self.assertEqual(actual_output, expected_output) @@ -44,7 +44,7 @@ def test_zero_time(self, mock_stdout): # call the script with the test log file genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"]) expected_output = ( - self.tmp_dir.name + "\n1_test 74 9440\nTotal 74 9440\n" + self.tmp_dir.name + "\n1_test 74 9440 N/A\nTotal 74 9440\n" ).split() actual_output = mock_stdout.getvalue().split() self.assertEqual(actual_output, expected_output) @@ -61,7 +61,7 @@ def test_elapsed_time_longer_duration(self, mock_stdout): genElapsedTime.scan_logs(["--logDir", str(self.tmp_dir.name), "--noHeader"]) # check if output is correct expected_output = ( - self.tmp_dir.name + "\n1_test 744 9440 Total 744 9440" + self.tmp_dir.name + "\n1_test 744 9440 N/A Total 744 9440" ).split() actual_output = mock_stdout.getvalue().split() self.assertEqual(actual_output, expected_output) diff --git a/flow/util/genElapsedTime.py b/flow/util/genElapsedTime.py index 4a1159d105..3c9d0c7cf9 100755 --- a/flow/util/genElapsedTime.py +++ b/flow/util/genElapsedTime.py @@ -4,9 +4,10 @@ # in the flow and prints it in a table # --------------------------------------------------------------------------- +import argparse +import hashlib import pathlib import os -import argparse # argument parsing import sys # Parse and validate arguments @@ -61,15 +62,33 @@ def print_log_dir_times(logdir, args): int(line.split("Peak memory: ")[1].split("KB")[0]) / 1024 ) + # content hash for .odb file alongside .log file is useful to + # debug divergent results under what should be identical + # builds(such as local and CI builds) + odb_file = pathlib.Path( + str(f).replace("logs/", "results/").replace(".log", ".odb") + ) + if odb_file.exists(): + hasher = hashlib.sha1() + with open(odb_file, "rb") as odb_f: + while chunk := odb_f.read(16 * 1024 * 1024): + hasher.update(chunk) + odb_hash = hasher.hexdigest() + else: + odb_hash = "N/A" + if not found: print("No elapsed time found in", str(f), file=sys.stderr) continue # Print the name of the step and the corresponding elapsed time - format_str = "%-25s %20s %14s" + format_str = "%-25s %10s %14s %20s" if elapsedTime is not None and peak_memory is not None: if first and not args.noHeader: - print(format_str % ("Log", "Elapsed seconds", "Peak Memory/MB")) + print( + format_str + % ("Log", "Elapsed/s", "Peak Memory/MB", "sha1sum .odb [0:20)") + ) first = False print( format_str @@ -77,13 +96,14 @@ def print_log_dir_times(logdir, args): os.path.splitext(os.path.basename(str(f)))[0], elapsedTime, peak_memory, + odb_hash[0:20], ) ) totalElapsed += elapsedTime total_max_memory = max(total_max_memory, int(peak_memory)) if totalElapsed != 0: - print(format_str % ("Total", totalElapsed, total_max_memory)) + print(format_str % ("Total", totalElapsed, total_max_memory, "")) def scan_logs(args):