Skip to content

Commit b1697a8

Browse files
committed
Match benchmark comparisons by case name
1 parent 6bfdd03 commit b1697a8

1 file changed

Lines changed: 142 additions & 9 deletions

File tree

crates/izwi-cli/src/commands/bench.rs

Lines changed: 142 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use chrono::{DateTime, Utc};
88
use futures::{stream, StreamExt};
99
use indicatif::{ProgressBar, ProgressStyle};
1010
use serde::{Deserialize, Serialize};
11+
use std::collections::{BTreeMap, BTreeSet};
1112
use std::path::Path;
1213
use std::sync::Arc;
1314
use std::time::Instant;
@@ -441,25 +442,34 @@ async fn bench_compare(
441442

442443
let current_value = read_json_report(current_path).await?;
443444
let baseline_value = read_json_report(baseline_path).await?;
444-
let current_reports = report_entries(&current_value)?;
445-
let baseline_reports = report_entries(&baseline_value)?;
445+
let current_reports = report_entry_map(report_entries(&current_value)?, "Current")?;
446+
let baseline_reports = report_entry_map(report_entries(&baseline_value)?, "Baseline")?;
446447
if current_reports.len() != baseline_reports.len() {
447448
return Err(CliError::InvalidInput(format!(
448449
"Report shape mismatch: current has {} case(s), baseline has {} case(s)",
449450
current_reports.len(),
450451
baseline_reports.len()
451452
)));
452453
}
454+
let current_names: BTreeSet<_> = current_reports.keys().cloned().collect();
455+
let baseline_names: BTreeSet<_> = baseline_reports.keys().cloned().collect();
456+
if current_names != baseline_names {
457+
let only_current: Vec<_> = current_names.difference(&baseline_names).cloned().collect();
458+
let only_baseline: Vec<_> = baseline_names.difference(&current_names).cloned().collect();
459+
return Err(CliError::InvalidInput(format!(
460+
"Report case mismatch: only in current: {}; only in baseline: {}",
461+
format_case_list(&only_current),
462+
format_case_list(&only_baseline)
463+
)));
464+
}
453465

454466
let mut checks = Vec::new();
455-
for (current, baseline) in current_reports.iter().zip(baseline_reports.iter()) {
456-
let case = if current.name == baseline.name {
457-
current.name.clone()
458-
} else {
459-
format!("{} vs {}", current.name, baseline.name)
460-
};
467+
for (case, current) in &current_reports {
468+
let baseline = baseline_reports
469+
.get(case)
470+
.expect("case set equality should guarantee baseline entry");
461471
collect_metric_comparisons(
462-
&case,
472+
case,
463473
&current.summary,
464474
&baseline.summary,
465475
tolerance_percent,
@@ -496,6 +506,30 @@ async fn bench_compare(
496506
Ok(())
497507
}
498508

509+
fn report_entry_map(
510+
entries: Vec<ReportEntry>,
511+
label: &str,
512+
) -> Result<BTreeMap<String, ReportEntry>> {
513+
let mut map = BTreeMap::new();
514+
for entry in entries {
515+
let name = entry.name.clone();
516+
if map.insert(name.clone(), entry).is_some() {
517+
return Err(CliError::InvalidInput(format!(
518+
"{label} report has duplicate benchmark case name `{name}`"
519+
)));
520+
}
521+
}
522+
Ok(map)
523+
}
524+
525+
fn format_case_list(cases: &[String]) -> String {
526+
if cases.is_empty() {
527+
"none".to_string()
528+
} else {
529+
cases.join(", ")
530+
}
531+
}
532+
499533
async fn read_json_report(path: &Path) -> Result<serde_json::Value> {
500534
let text = tokio::fs::read_to_string(path)
501535
.await
@@ -2282,6 +2316,7 @@ fn print_runtime_delta(
22822316
#[cfg(test)]
22832317
mod tests {
22842318
use super::*;
2319+
use tempfile::tempdir;
22852320

22862321
#[test]
22872322
fn chat_stream_event_records_first_delta_and_terminal_usage() {
@@ -2321,4 +2356,102 @@ mod tests {
23212356
assert_eq!(sample.generation_time_ms, Some(123.0));
23222357
assert!(sample.ttft_ms >= 0.0);
23232358
}
2359+
2360+
#[tokio::test]
2361+
async fn compare_matches_suite_cases_by_name_not_order() {
2362+
let dir = tempdir().expect("temp dir should be created");
2363+
let baseline = dir.path().join("baseline.json");
2364+
let current = dir.path().join("current.json");
2365+
2366+
let baseline_report = serde_json::json!({
2367+
"reports": [
2368+
{
2369+
"name": "fast-case",
2370+
"report": {
2371+
"summary": {
2372+
"latency_ms": { "p95": 100.0 }
2373+
}
2374+
}
2375+
},
2376+
{
2377+
"name": "slow-case",
2378+
"report": {
2379+
"summary": {
2380+
"latency_ms": { "p95": 1000.0 }
2381+
}
2382+
}
2383+
}
2384+
]
2385+
});
2386+
let current_report = serde_json::json!({
2387+
"reports": [
2388+
{
2389+
"name": "slow-case",
2390+
"report": {
2391+
"summary": {
2392+
"latency_ms": { "p95": 1000.0 }
2393+
}
2394+
}
2395+
},
2396+
{
2397+
"name": "fast-case",
2398+
"report": {
2399+
"summary": {
2400+
"latency_ms": { "p95": 100.0 }
2401+
}
2402+
}
2403+
}
2404+
]
2405+
});
2406+
std::fs::write(
2407+
&baseline,
2408+
serde_json::to_vec(&baseline_report).expect("baseline should serialize"),
2409+
)
2410+
.expect("baseline should be written");
2411+
std::fs::write(
2412+
&current,
2413+
serde_json::to_vec(&current_report).expect("current should serialize"),
2414+
)
2415+
.expect("current should be written");
2416+
2417+
bench_compare(
2418+
&current,
2419+
&baseline,
2420+
5.0,
2421+
&BenchOptions {
2422+
output_format: OutputFormat::Json,
2423+
quiet: true,
2424+
},
2425+
)
2426+
.await
2427+
.expect("reordered same-name suite cases should compare successfully");
2428+
}
2429+
2430+
#[test]
2431+
fn duplicate_suite_case_names_are_rejected() {
2432+
let report = serde_json::json!({
2433+
"reports": [
2434+
{
2435+
"name": "duplicate",
2436+
"report": {
2437+
"summary": {
2438+
"latency_ms": { "p95": 100.0 }
2439+
}
2440+
}
2441+
},
2442+
{
2443+
"name": "duplicate",
2444+
"report": {
2445+
"summary": {
2446+
"latency_ms": { "p95": 101.0 }
2447+
}
2448+
}
2449+
}
2450+
]
2451+
});
2452+
2453+
let entries = report_entries(&report).expect("suite entries should parse");
2454+
let err = report_entry_map(entries, "Current").expect_err("duplicates should fail");
2455+
assert!(format!("{err}").contains("duplicate benchmark case name `duplicate`"));
2456+
}
23242457
}

0 commit comments

Comments
 (0)