@@ -40,6 +40,7 @@ class AccuracyTestResult:
4040 baseline_accuracy : float
4141 error : Optional [str ]
4242 latency : Optional [float ] = None
43+ variant : Optional [str ] = None
4344
4445
4546def write_accuracy_github_summary (
@@ -54,16 +55,18 @@ def write_accuracy_github_summary(
5455 dataset: Dataset name used for evaluation
5556 results: List of AccuracyTestResult objects
5657 """
57- summary = f"## { test_name } - Accuracy ({ dataset } )\n "
58- summary += "| model | status | score | baseline | error |\n "
59- summary += "| ----- | ------ | ----- | -------- | ----- |\n "
58+ summary = f"#### { test_name } - Accuracy ({ dataset } )\n "
59+ summary += "| config | status | score | baseline | error |\n "
60+ summary += "| ------ | ------ | ----- | -------- | ----- |\n "
6061
6162 for result in results :
6263 status_emoji = "✅" if result .passed else "❌"
6364 score_str = f"{ result .score :.4f} " if result .score is not None else "N/A"
6465 baseline_str = f"{ result .baseline_accuracy :.4f} "
6566 error_str = result .error if result .error else "-"
66- summary += f"| { result .model } | { status_emoji } | { score_str } | { baseline_str } | { error_str } |\n "
67+ # Use variant name if available, otherwise use model path
68+ config_name = result .variant if result .variant else result .model
69+ summary += f"| { config_name } | { status_emoji } | { score_str } | { baseline_str } | { error_str } |\n "
6770
6871 write_github_step_summary (summary )
6972
@@ -239,6 +242,7 @@ def run_accuracy_test(
239242 score = None ,
240243 baseline_accuracy = params .baseline_accuracy ,
241244 error = error ,
245+ variant = model .variant ,
242246 )
243247
244248 # Validate against baseline
@@ -265,4 +269,5 @@ def run_accuracy_test(
265269 baseline_accuracy = params .baseline_accuracy ,
266270 error = error if not passed else None ,
267271 latency = latency ,
272+ variant = model .variant ,
268273 )
0 commit comments