@@ -8,6 +8,7 @@ use chrono::{DateTime, Utc};
88use futures:: { stream, StreamExt } ;
99use indicatif:: { ProgressBar , ProgressStyle } ;
1010use serde:: { Deserialize , Serialize } ;
11+ use std:: collections:: { BTreeMap , BTreeSet } ;
1112use std:: path:: Path ;
1213use std:: sync:: Arc ;
1314use std:: time:: Instant ;
@@ -441,25 +442,34 @@ async fn bench_compare(
441442
442443 let current_value = read_json_report ( current_path) . await ?;
443444 let baseline_value = read_json_report ( baseline_path) . await ?;
444- let current_reports = report_entries ( & current_value) ?;
445- let baseline_reports = report_entries ( & baseline_value) ?;
445+ let current_reports = report_entry_map ( report_entries ( & current_value) ? , "Current" ) ?;
446+ let baseline_reports = report_entry_map ( report_entries ( & baseline_value) ? , "Baseline" ) ?;
446447 if current_reports. len ( ) != baseline_reports. len ( ) {
447448 return Err ( CliError :: InvalidInput ( format ! (
448449 "Report shape mismatch: current has {} case(s), baseline has {} case(s)" ,
449450 current_reports. len( ) ,
450451 baseline_reports. len( )
451452 ) ) ) ;
452453 }
454+ let current_names: BTreeSet < _ > = current_reports. keys ( ) . cloned ( ) . collect ( ) ;
455+ let baseline_names: BTreeSet < _ > = baseline_reports. keys ( ) . cloned ( ) . collect ( ) ;
456+ if current_names != baseline_names {
457+ let only_current: Vec < _ > = current_names. difference ( & baseline_names) . cloned ( ) . collect ( ) ;
458+ let only_baseline: Vec < _ > = baseline_names. difference ( & current_names) . cloned ( ) . collect ( ) ;
459+ return Err ( CliError :: InvalidInput ( format ! (
460+ "Report case mismatch: only in current: {}; only in baseline: {}" ,
461+ format_case_list( & only_current) ,
462+ format_case_list( & only_baseline)
463+ ) ) ) ;
464+ }
453465
454466 let mut checks = Vec :: new ( ) ;
455- for ( current, baseline) in current_reports. iter ( ) . zip ( baseline_reports. iter ( ) ) {
456- let case = if current. name == baseline. name {
457- current. name . clone ( )
458- } else {
459- format ! ( "{} vs {}" , current. name, baseline. name)
460- } ;
467+ for ( case, current) in & current_reports {
468+ let baseline = baseline_reports
469+ . get ( case)
470+ . expect ( "case set equality should guarantee baseline entry" ) ;
461471 collect_metric_comparisons (
462- & case,
472+ case,
463473 & current. summary ,
464474 & baseline. summary ,
465475 tolerance_percent,
@@ -496,6 +506,30 @@ async fn bench_compare(
496506 Ok ( ( ) )
497507}
498508
509+ fn report_entry_map (
510+ entries : Vec < ReportEntry > ,
511+ label : & str ,
512+ ) -> Result < BTreeMap < String , ReportEntry > > {
513+ let mut map = BTreeMap :: new ( ) ;
514+ for entry in entries {
515+ let name = entry. name . clone ( ) ;
516+ if map. insert ( name. clone ( ) , entry) . is_some ( ) {
517+ return Err ( CliError :: InvalidInput ( format ! (
518+ "{label} report has duplicate benchmark case name `{name}`"
519+ ) ) ) ;
520+ }
521+ }
522+ Ok ( map)
523+ }
524+
525+ fn format_case_list ( cases : & [ String ] ) -> String {
526+ if cases. is_empty ( ) {
527+ "none" . to_string ( )
528+ } else {
529+ cases. join ( ", " )
530+ }
531+ }
532+
499533async fn read_json_report ( path : & Path ) -> Result < serde_json:: Value > {
500534 let text = tokio:: fs:: read_to_string ( path)
501535 . await
@@ -2282,6 +2316,7 @@ fn print_runtime_delta(
22822316#[ cfg( test) ]
22832317mod tests {
22842318 use super :: * ;
2319+ use tempfile:: tempdir;
22852320
22862321 #[ test]
22872322 fn chat_stream_event_records_first_delta_and_terminal_usage ( ) {
@@ -2321,4 +2356,102 @@ mod tests {
23212356 assert_eq ! ( sample. generation_time_ms, Some ( 123.0 ) ) ;
23222357 assert ! ( sample. ttft_ms >= 0.0 ) ;
23232358 }
2359+
2360+ #[ tokio:: test]
2361+ async fn compare_matches_suite_cases_by_name_not_order ( ) {
2362+ let dir = tempdir ( ) . expect ( "temp dir should be created" ) ;
2363+ let baseline = dir. path ( ) . join ( "baseline.json" ) ;
2364+ let current = dir. path ( ) . join ( "current.json" ) ;
2365+
2366+ let baseline_report = serde_json:: json!( {
2367+ "reports" : [
2368+ {
2369+ "name" : "fast-case" ,
2370+ "report" : {
2371+ "summary" : {
2372+ "latency_ms" : { "p95" : 100.0 }
2373+ }
2374+ }
2375+ } ,
2376+ {
2377+ "name" : "slow-case" ,
2378+ "report" : {
2379+ "summary" : {
2380+ "latency_ms" : { "p95" : 1000.0 }
2381+ }
2382+ }
2383+ }
2384+ ]
2385+ } ) ;
2386+ let current_report = serde_json:: json!( {
2387+ "reports" : [
2388+ {
2389+ "name" : "slow-case" ,
2390+ "report" : {
2391+ "summary" : {
2392+ "latency_ms" : { "p95" : 1000.0 }
2393+ }
2394+ }
2395+ } ,
2396+ {
2397+ "name" : "fast-case" ,
2398+ "report" : {
2399+ "summary" : {
2400+ "latency_ms" : { "p95" : 100.0 }
2401+ }
2402+ }
2403+ }
2404+ ]
2405+ } ) ;
2406+ std:: fs:: write (
2407+ & baseline,
2408+ serde_json:: to_vec ( & baseline_report) . expect ( "baseline should serialize" ) ,
2409+ )
2410+ . expect ( "baseline should be written" ) ;
2411+ std:: fs:: write (
2412+ & current,
2413+ serde_json:: to_vec ( & current_report) . expect ( "current should serialize" ) ,
2414+ )
2415+ . expect ( "current should be written" ) ;
2416+
2417+ bench_compare (
2418+ & current,
2419+ & baseline,
2420+ 5.0 ,
2421+ & BenchOptions {
2422+ output_format : OutputFormat :: Json ,
2423+ quiet : true ,
2424+ } ,
2425+ )
2426+ . await
2427+ . expect ( "reordered same-name suite cases should compare successfully" ) ;
2428+ }
2429+
2430+ #[ test]
2431+ fn duplicate_suite_case_names_are_rejected ( ) {
2432+ let report = serde_json:: json!( {
2433+ "reports" : [
2434+ {
2435+ "name" : "duplicate" ,
2436+ "report" : {
2437+ "summary" : {
2438+ "latency_ms" : { "p95" : 100.0 }
2439+ }
2440+ }
2441+ } ,
2442+ {
2443+ "name" : "duplicate" ,
2444+ "report" : {
2445+ "summary" : {
2446+ "latency_ms" : { "p95" : 101.0 }
2447+ }
2448+ }
2449+ }
2450+ ]
2451+ } ) ;
2452+
2453+ let entries = report_entries ( & report) . expect ( "suite entries should parse" ) ;
2454+ let err = report_entry_map ( entries, "Current" ) . expect_err ( "duplicates should fail" ) ;
2455+ assert ! ( format!( "{err}" ) . contains( "duplicate benchmark case name `duplicate`" ) ) ;
2456+ }
23242457}
0 commit comments